diff --git a/presto-accumulo/src/main/java/com/facebook/presto/accumulo/AccumuloMetadata.java b/presto-accumulo/src/main/java/com/facebook/presto/accumulo/AccumuloMetadata.java index bfc0fc57c8d13..b38ce59b145ca 100644 --- a/presto-accumulo/src/main/java/com/facebook/presto/accumulo/AccumuloMetadata.java +++ b/presto-accumulo/src/main/java/com/facebook/presto/accumulo/AccumuloMetadata.java @@ -37,6 +37,7 @@ import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -102,7 +103,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { clearRollback(); return Optional.empty(); @@ -212,7 +213,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { clearRollback(); return Optional.empty(); diff --git a/presto-base-jdbc/src/main/java/com/facebook/presto/plugin/jdbc/JdbcMetadata.java b/presto-base-jdbc/src/main/java/com/facebook/presto/plugin/jdbc/JdbcMetadata.java index 2b1bccfbc90f6..f20ddb2231668 100644 --- a/presto-base-jdbc/src/main/java/com/facebook/presto/plugin/jdbc/JdbcMetadata.java +++ b/presto-base-jdbc/src/main/java/com/facebook/presto/plugin/jdbc/JdbcMetadata.java @@ -31,6 +31,7 @@ import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.slice.Slice; @@ -173,7 +174,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { JdbcOutputTableHandle handle = (JdbcOutputTableHandle) tableHandle; jdbcClient.commitCreateTable(handle); @@ -203,7 +204,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle tableHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle tableHandle, Collection fragments, List computedStatistics) { JdbcOutputTableHandle jdbcInsertHandle = (JdbcOutputTableHandle) tableHandle; jdbcClient.finishInsertTable(jdbcInsertHandle); diff --git a/presto-blackhole/src/main/java/com/facebook/presto/plugin/blackhole/BlackHoleMetadata.java b/presto-blackhole/src/main/java/com/facebook/presto/plugin/blackhole/BlackHoleMetadata.java index ff0df52c19d22..9b07496caf634 100644 --- a/presto-blackhole/src/main/java/com/facebook/presto/plugin/blackhole/BlackHoleMetadata.java +++ b/presto-blackhole/src/main/java/com/facebook/presto/plugin/blackhole/BlackHoleMetadata.java @@ -31,6 +31,7 @@ import com.facebook.presto.spi.SchemaTablePrefix; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; @@ -159,7 +160,7 @@ public void renameTable(ConnectorSession session, ConnectorTableHandle tableHand public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { ConnectorOutputTableHandle outputTableHandle = beginCreateTable(session, tableMetadata, Optional.empty()); - finishCreateTable(session, outputTableHandle, ImmutableList.of()); + finishCreateTable(session, outputTableHandle, ImmutableList.of(), ImmutableList.of()); } @Override @@ -220,7 +221,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { BlackHoleOutputTableHandle blackHoleOutputTableHandle = (BlackHoleOutputTableHandle) tableHandle; BlackHoleTableHandle table = blackHoleOutputTableHandle.getTable(); @@ -236,7 +237,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { return Optional.empty(); } diff --git a/presto-blackhole/src/test/java/com/facebook/presto/plugin/blackhole/TestBlackHoleMetadata.java b/presto-blackhole/src/test/java/com/facebook/presto/plugin/blackhole/TestBlackHoleMetadata.java index adc0e7fd0167e..ef98415607a08 100644 --- a/presto-blackhole/src/test/java/com/facebook/presto/plugin/blackhole/TestBlackHoleMetadata.java +++ b/presto-blackhole/src/test/java/com/facebook/presto/plugin/blackhole/TestBlackHoleMetadata.java @@ -65,7 +65,7 @@ public void tableIsCreatedAfterCommits() assertThatNoTableIsCreated(); - metadata.finishCreateTable(SESSION, table, ImmutableList.of()); + metadata.finishCreateTable(SESSION, table, ImmutableList.of(), ImmutableList.of()); List tables = metadata.listTables(SESSION, null); assertTrue(tables.size() == 1, "Expected only one table."); diff --git a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java index 24aa9572411d3..2d98de4d9432f 100644 --- a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java +++ b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java @@ -35,6 +35,7 @@ import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; import com.facebook.presto.spi.predicate.TupleDomain; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.type.Type; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -312,7 +313,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { return Optional.empty(); } @@ -339,7 +340,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { return Optional.empty(); } diff --git a/presto-docs/src/main/sphinx/connector/hive.rst b/presto-docs/src/main/sphinx/connector/hive.rst index ebb395c84170b..584d2f1862978 100644 --- a/presto-docs/src/main/sphinx/connector/hive.rst +++ b/presto-docs/src/main/sphinx/connector/hive.rst @@ -111,9 +111,9 @@ security options in the Hive connector. Hive Configuration Properties ----------------------------- -================================================== ============================================================ ========== +================================================== ============================================================ ============================= Property Name Description Default -================================================== ============================================================ ========== +================================================== ============================================================ ============================= ``hive.metastore.uri`` The URI(s) of the Hive metastore to connect to using the Thrift protocol. If multiple URIs are provided, the first URI is used by default and the rest of the URIs are @@ -175,7 +175,11 @@ Property Name Description ``hive.non-managed-table-writes-enabled`` Enable writes to non-managed (external) Hive tables. ``false`` ``hive.non-managed-table-creates-enabled`` Enable creating non-managed (external) Hive tables. ``true`` -================================================== ============================================================ ========== + +``hive.collect-column-statistics-on-write`` Enables automatic column level statistics collection ``ENABLED_FOR_MARKED_TABLES`` + on write. Possible values are ``ENABLED``, + ``ENABLED_FOR_MARKED_TABLES`` or ``DISABLED`` +================================================== ============================================================ ============================= Amazon S3 Configuration ----------------------- @@ -334,6 +338,59 @@ the ``org.apache.hadoop.conf.Configurable`` interface from the Hadoop Java API, will be passed in after the object instance is created and before it is asked to provision or retrieve any encryption keys. +Table Statistics +---------------- + +The Hive connector collects ``numRows``, ``rawDataSize``, ``totalSize``, ``numFiles`` statistics +automatically on ``INSERT INTO`` and ``CREATE TABLE AS SELECT`` operations. + +The Hive connector can also collect the column level statistics: + +============= ================================================================================================================ +Column Type Collectible Statistics +============= ================================================================================================================ +``TINYINT`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``SMALLINT`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``INTEGER`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``BIGINT`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``DOUBLE`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``REAL`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``BOOLEAN`` ``NUMBER_OF_NULLS``, ``NUMBER_OF_FALSE``, ``NUMBER_OF_TRUE`` + +``VARCHAR`` ``NUMBER_OF_NULLS``, ``NUMBER_OF_DISTINCT_VALUES``, ``MAX_VALUE_SIZE_IN_BYTES``, ``AVERAGE_VALUE_SIZE_IN_BYTES`` + +``CHAR`` ``NUMBER_OF_NULLS``, ``NUMBER_OF_DISTINCT_VALUES`` + +``VARBINARY`` ``NUMBER_OF_NULLS``, ``MAX_VALUE_SIZE_IN_BYTES``, ``AVERAGE_VALUE_SIZE_IN_BYTES`` + +``DATE`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``TIMESTAMP`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` + +``DECIMAL`` ``NUMBER_OF_NULLS``, ``MIN``, ``MAX``, ``NUMBER_OF_DISTINCT_VALUES`` +============= ================================================================================================================ + +Automatic column level statistics collection on write can be enabled by tuning the ``hive.collect-column-statistics-on-write`` +property: + +* ``ENABLED``- Presto will collect the column level statistics for all the tables. +* ``ENABLED_FOR_MARKED_TABLES`` - Presto will collect the column level statistics for the tables + created with the ``collect_column_statistics_on_write_enabled`` set to ``true``: + :: + + CREATE TABLE automatically_collect_column_statistics ( + a BIGINT + ) + WITH (collect_column_statistics_on_write_enabled = true) + +* ``DISABLED`` - Presto will not collect the column level statistics for any table. + Schema Evolution ---------------- diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/CollectibleStatisticsProvider.java b/presto-hive/src/main/java/com/facebook/presto/hive/CollectibleStatisticsProvider.java new file mode 100644 index 0000000000000..a9357ea1d5b48 --- /dev/null +++ b/presto-hive/src/main/java/com/facebook/presto/hive/CollectibleStatisticsProvider.java @@ -0,0 +1,24 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.hive; + +import com.facebook.presto.spi.statistics.ColumnStatisticType; +import com.facebook.presto.spi.type.Type; + +import java.util.Set; + +public interface CollectibleStatisticsProvider +{ + Set get(Type type); +} diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveBasicStatistics.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveBasicStatistics.java index 7a31ed4985dad..941b2ca271bc6 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveBasicStatistics.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveBasicStatistics.java @@ -13,11 +13,6 @@ */ package com.facebook.presto.hive; -import com.google.common.collect.ImmutableMap; - -import javax.annotation.Nullable; - -import java.util.Map; import java.util.Objects; import java.util.OptionalLong; @@ -27,11 +22,6 @@ public class HiveBasicStatistics { - private static final String NUM_FILES = "numFiles"; - private static final String NUM_ROWS = "numRows"; - private static final String RAW_DATA_SIZE = "rawDataSize"; - private static final String TOTAL_SIZE = "totalSize"; - private final OptionalLong fileCount; private final OptionalLong rowCount; private final OptionalLong inMemoryDataSizeInBytes; @@ -120,40 +110,4 @@ public String toString() .add("onDiskDataSizeInBytes", onDiskDataSizeInBytes) .toString(); } - - public Map toPartitionParameters() - { - ImmutableMap.Builder properties = ImmutableMap.builder(); - fileCount.ifPresent(count -> properties.put(NUM_FILES, Long.toString(count))); - rowCount.ifPresent(count -> properties.put(NUM_ROWS, Long.toString(count))); - inMemoryDataSizeInBytes.ifPresent(size -> properties.put(RAW_DATA_SIZE, Long.toString(size))); - onDiskDataSizeInBytes.ifPresent(size -> properties.put(TOTAL_SIZE, Long.toString(size))); - return properties.build(); - } - - public static HiveBasicStatistics createFromPartitionParameters(Map parameters) - { - OptionalLong numFiles = parse(parameters.get(NUM_FILES)); - OptionalLong numRows = parse(parameters.get(NUM_ROWS)); - OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE)); - OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE)); - return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes); - } - - private static OptionalLong parse(@Nullable String parameterValue) - { - if (parameterValue == null) { - return OptionalLong.empty(); - } - try { - long longValue = Long.parseLong(parameterValue); - if (longValue < 0) { - return OptionalLong.empty(); - } - return OptionalLong.of(longValue); - } - catch (NumberFormatException e) { - return OptionalLong.empty(); - } - } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java index 17c711052c0c2..6ad99f86e7962 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientConfig.java @@ -37,6 +37,7 @@ import java.util.TimeZone; import java.util.concurrent.TimeUnit; +import static com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption.ENABLED_FOR_MARKED_TABLES; import static io.airlift.units.DataSize.Unit.MEGABYTE; @DefunctConfig({ @@ -134,6 +135,8 @@ public class HiveClientConfig private boolean tableStatisticsEnabled = true; + private CollectColumnStatisticsOnWriteOption collectColumnStatisticsOnWrite = ENABLED_FOR_MARKED_TABLES; + public int getMaxInitialSplits() { return maxInitialSplits; @@ -1044,4 +1047,25 @@ public boolean isTableStatisticsEnabled() { return tableStatisticsEnabled; } + + @NotNull + public CollectColumnStatisticsOnWriteOption getCollectColumnStatisticsOnWrite() + { + return collectColumnStatisticsOnWrite; + } + + @Config("hive.collect-column-statistics-on-write") + @ConfigDescription("Enables automatic column level statistics collection on write") + public HiveClientConfig setCollectColumnStatisticsOnWrite(CollectColumnStatisticsOnWriteOption collectColumnStatisticsOnWrite) + { + this.collectColumnStatisticsOnWrite = collectColumnStatisticsOnWrite; + return this; + } + + public enum CollectColumnStatisticsOnWriteOption + { + ENABLED, + ENABLED_FOR_MARKED_TABLES, + DISABLED + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java index b0bb04cb410a3..b41a9ff6bfed3 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveClientModule.java @@ -19,6 +19,7 @@ import com.facebook.presto.hive.parquet.ParquetPageSourceFactory; import com.facebook.presto.hive.parquet.ParquetRecordCursorProvider; import com.facebook.presto.hive.rcfile.RcFilePageSourceFactory; +import com.facebook.presto.hive.util.Statistics; import com.facebook.presto.spi.connector.ConnectorNodePartitioningProvider; import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; import com.facebook.presto.spi.connector.ConnectorPageSourceProvider; @@ -60,6 +61,7 @@ public void configure(Binder binder) binder.bind(HiveConnectorId.class).toInstance(new HiveConnectorId(connectorId)); binder.bind(TypeTranslator.class).toInstance(new HiveTypeTranslator()); binder.bind(CoercionPolicy.class).to(HiveCoercionPolicy.class).in(Scopes.SINGLETON); + binder.bind(CollectibleStatisticsProvider.class).toInstance(Statistics::getSupportedStatistics); binder.bind(HdfsConfigurationUpdater.class).in(Scopes.SINGLETON); binder.bind(HdfsConfiguration.class).to(HiveHdfsConfiguration.class).in(Scopes.SINGLETON); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java index 20aeb9b83ed44..2ee6bb05c92f9 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java @@ -17,6 +17,7 @@ import com.facebook.presto.hive.LocationService.WriteInfo; import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.Database; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; import com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege; import com.facebook.presto.hive.metastore.Partition; @@ -26,7 +27,6 @@ import com.facebook.presto.hive.metastore.StorageFormat; import com.facebook.presto.hive.metastore.Table; import com.facebook.presto.hive.statistics.HiveStatisticsProvider; -import com.facebook.presto.hive.util.Statistics; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorInsertTableHandle; @@ -51,6 +51,7 @@ import com.facebook.presto.spi.SystemTable; import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.ViewNotFoundException; +import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; @@ -60,7 +61,11 @@ import com.facebook.presto.spi.security.GrantInfo; import com.facebook.presto.spi.security.Privilege; import com.facebook.presto.spi.security.PrivilegeInfo; +import com.facebook.presto.spi.statistics.ColumnStatisticMetadata; +import com.facebook.presto.spi.statistics.ColumnStatisticType; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.statistics.TableStatistics; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.google.common.annotations.VisibleForTesting; @@ -99,6 +104,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics; import static com.facebook.presto.hive.HiveBasicStatistics.createZeroStatistics; import static com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle; import static com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME; @@ -116,6 +122,7 @@ import static com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR; import static com.facebook.presto.hive.HivePartitionManager.extractPartitionKeyValues; +import static com.facebook.presto.hive.HiveSessionProperties.getCollectColumnStatisticsOnWrite; import static com.facebook.presto.hive.HiveSessionProperties.getHiveStorageFormat; import static com.facebook.presto.hive.HiveSessionProperties.isBucketExecutionEnabled; import static com.facebook.presto.hive.HiveSessionProperties.isRespectTableFormat; @@ -123,6 +130,7 @@ import static com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled; import static com.facebook.presto.hive.HiveTableProperties.BUCKETED_BY_PROPERTY; import static com.facebook.presto.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY; +import static com.facebook.presto.hive.HiveTableProperties.COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED; import static com.facebook.presto.hive.HiveTableProperties.EXTERNAL_LOCATION_PROPERTY; import static com.facebook.presto.hive.HiveTableProperties.ORC_BLOOM_FILTER_COLUMNS; import static com.facebook.presto.hive.HiveTableProperties.ORC_BLOOM_FILTER_FPP; @@ -161,7 +169,9 @@ import static com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat; import static com.facebook.presto.hive.util.ConfigurationUtils.toJobConf; import static com.facebook.presto.hive.util.Statistics.ReduceOperator.ADD; -import static com.facebook.presto.hive.util.Statistics.updateStatistics; +import static com.facebook.presto.hive.util.Statistics.fromComputedStatistics; +import static com.facebook.presto.hive.util.Statistics.groupComputedStatisticsByPartition; +import static com.facebook.presto.hive.util.Statistics.reduce; import static com.facebook.presto.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY; import static com.facebook.presto.spi.StandardErrorCode.INVALID_TABLE_PROPERTY; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; @@ -195,6 +205,8 @@ public class HiveMetadata private static final String ORC_BLOOM_FILTER_COLUMNS_KEY = "orc.bloom.filter.columns"; private static final String ORC_BLOOM_FILTER_FPP_KEY = "orc.bloom.filter.fpp"; + private static final String COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED_KEY = "presto.collect.column.statistics.on.write.key"; + private static final String PARTITIONS_TABLE_SUFFIX = "$partitions"; private final boolean allowCorruptWritesForTesting; @@ -212,6 +224,7 @@ public class HiveMetadata private final String prestoVersion; private final HiveStatisticsProvider hiveStatisticsProvider; private final int maxPartitions; + private final CollectibleStatisticsProvider collectibleStatisticsProvider; public HiveMetadata( SemiTransactionalHiveMetastore metastore, @@ -228,7 +241,8 @@ public HiveMetadata( TypeTranslator typeTranslator, String prestoVersion, HiveStatisticsProvider hiveStatisticsProvider, - int maxPartitions) + int maxPartitions, + CollectibleStatisticsProvider collectibleStatisticsProvider) { this.allowCorruptWritesForTesting = allowCorruptWritesForTesting; @@ -247,6 +261,7 @@ public HiveMetadata( this.hiveStatisticsProvider = requireNonNull(hiveStatisticsProvider, "hiveStatisticsProvider is null"); checkArgument(maxPartitions >= 1, "maxPartitions must be at least 1"); this.maxPartitions = maxPartitions; + this.collectibleStatisticsProvider = requireNonNull(collectibleStatisticsProvider, "collectibleStatisticsProvider is null"); } public SemiTransactionalHiveMetastore getMetastore() @@ -427,6 +442,11 @@ private ConnectorTableMetadata getTableMetadata(SchemaTableName tableName) Optional comment = Optional.ofNullable(table.get().getParameters().get(TABLE_COMMENT)); + String collectColumnStatisticsOnWrite = table.get().getParameters().get(COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED_KEY); + if (collectColumnStatisticsOnWrite != null && Boolean.valueOf(collectColumnStatisticsOnWrite)) { + properties.put(COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED, true); + } + return new ConnectorTableMetadata(tableName, columns.build(), properties.build(), comment); } @@ -606,7 +626,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe Optional bucketProperty = getBucketProperty(tableMetadata.getProperties()); List columnHandles = getColumnHandles(tableMetadata, ImmutableSet.copyOf(partitionedBy), typeTranslator); HiveStorageFormat hiveStorageFormat = getHiveStorageFormat(tableMetadata.getProperties()); - Map tableProperties = getEmptyTableProperties(tableMetadata, !partitionedBy.isEmpty()); + Map tableProperties = getEmptyTableProperties(tableMetadata); hiveStorageFormat.validateColumns(columnHandles); @@ -648,10 +668,17 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe external, prestoVersion); PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner()); - metastore.createTable(session, table, principalPrivileges, Optional.empty(), ignoreExisting); + HiveBasicStatistics basicStatistics = table.getPartitionColumns().isEmpty() ? createZeroStatistics() : createEmptyStatistics(); + metastore.createTable( + session, + table, + principalPrivileges, + Optional.empty(), + ignoreExisting, + new PartitionStatistics(basicStatistics, ImmutableMap.of())); } - private Map getEmptyTableProperties(ConnectorTableMetadata tableMetadata, boolean partitioned) + private Map getEmptyTableProperties(ConnectorTableMetadata tableMetadata) { Builder tableProperties = ImmutableMap.builder(); @@ -668,9 +695,7 @@ private Map getEmptyTableProperties(ConnectorTableMetadata table // Table comment property tableMetadata.getComment().ifPresent(value -> tableProperties.put(TABLE_COMMENT, value)); - if (!partitioned) { - tableProperties.putAll(createZeroStatistics().toPartitionParameters()); - } + tableProperties.put(COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED_KEY, Boolean.toString(HiveTableProperties.isCollectColumnStatisticsOnWriteEnabled(tableMetadata.getProperties()))); return tableProperties.build(); } @@ -829,7 +854,7 @@ public HiveOutputTableHandle beginCreateTable(ConnectorSession session, Connecto List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); Optional bucketProperty = getBucketProperty(tableMetadata.getProperties()); - Map tableProperties = getEmptyTableProperties(tableMetadata, !partitionedBy.isEmpty()); + Map tableProperties = getEmptyTableProperties(tableMetadata); // get the root directory for the database SchemaTableName schemaTableName = tableMetadata.getTable(); @@ -872,7 +897,7 @@ public HiveOutputTableHandle beginCreateTable(ConnectorSession session, Connecto } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { HiveOutputTableHandle handle = (HiveOutputTableHandle) tableHandle; @@ -909,22 +934,39 @@ public Optional finishCreateTable(ConnectorSession sess } } + Map columnTypes = handle.getInputColumns() + .stream() + .collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager))); + Map, ComputedStatistics> partitionComputedStatistics = groupComputedStatisticsByPartition(computedStatistics, handle.getPartitionedBy(), columnTypes); + + PartitionStatistics tableStatistics; if (table.getPartitionColumns().isEmpty()) { - HiveBasicStatistics tableStatistic = partitionUpdates.stream() + HiveBasicStatistics basicStatistics = partitionUpdates.stream() .map(PartitionUpdate::getStatistics) - .reduce(Statistics::add) + .reduce((first, second) -> reduce(first, second, ADD)) .orElse(createZeroStatistics()); - table = updateStatistics(table, tableStatistic, ADD); + tableStatistics = createPartitionStatistics(session, basicStatistics, ImmutableList.of(), columnTypes, partitionComputedStatistics); + } + else { + tableStatistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); } - metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false); + metastore.createTable(session, table, principalPrivileges, Optional.of(writeInfo.getWritePath()), false, tableStatistics); if (!handle.getPartitionedBy().isEmpty()) { if (isRespectTableFormat(session)) { Verify.verify(handle.getPartitionStorageFormat() == handle.getTableStorageFormat()); } for (PartitionUpdate update : partitionUpdates) { - metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), buildPartitionObject(session, table, update), update.getWritePath()); + Partition partition = buildPartitionObject(session, table, update); + PartitionStatistics partitionStatistics = createPartitionStatistics(session, update.getStatistics(), partition.getValues(), columnTypes, partitionComputedStatistics); + metastore.addPartition( + session, + handle.getSchemaName(), + handle.getTableName(), + buildPartitionObject(session, table, update), + update.getWritePath(), + partitionStatistics); } } @@ -1074,7 +1116,7 @@ public HiveInsertTableHandle beginInsert(ConnectorSession session, ConnectorTabl } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { HiveInsertTableHandle handle = (HiveInsertTableHandle) insertHandle; @@ -1104,6 +1146,15 @@ public Optional finishInsert(ConnectorSession session, } } + List partitionedBy = table.get().getPartitionColumns() + .stream() + .map(Column::getName) + .collect(toImmutableList()); + Map columnTypes = handle.getInputColumns() + .stream() + .collect(toImmutableMap(HiveColumnHandle::getName, column -> column.getHiveType().getType(typeManager))); + Map, ComputedStatistics> partitionComputedStatistics = groupComputedStatisticsByPartition(computedStatistics, partitionedBy, columnTypes); + for (PartitionUpdate partitionUpdate : partitionUpdates) { if (partitionUpdate.getName().isEmpty()) { // insert into unpartitioned table @@ -1113,18 +1164,19 @@ public Optional finishInsert(ConnectorSession session, handle.getTableName(), partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), - partitionUpdate.getStatistics()); + createPartitionStatistics(session, partitionUpdate.getStatistics(), ImmutableList.of(), columnTypes, partitionComputedStatistics)); } else if (partitionUpdate.getUpdateMode() == APPEND) { // insert into existing partition + List partitionValues = toPartitionValues(partitionUpdate.getName()); metastore.finishInsertIntoExistingPartition( session, handle.getSchemaName(), handle.getTableName(), - toPartitionValues(partitionUpdate.getName()), + partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), - partitionUpdate.getStatistics()); + createPartitionStatistics(session, partitionUpdate.getStatistics(), partitionValues, columnTypes, partitionComputedStatistics)); } else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode() == OVERWRITE) { // insert into new partition or overwrite existing partition @@ -1132,11 +1184,16 @@ else if (partitionUpdate.getUpdateMode() == NEW || partitionUpdate.getUpdateMode if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && isRespectTableFormat(session)) { throw new PrestoException(HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert"); } - if (partitionUpdate.getUpdateMode() == OVERWRITE) { metastore.dropPartition(session, handle.getSchemaName(), handle.getTableName(), partition.getValues()); } - metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath()); + metastore.addPartition( + session, + handle.getSchemaName(), + handle.getTableName(), + partition, + partitionUpdate.getWritePath(), + createPartitionStatistics(session, partitionUpdate.getStatistics(), partition.getValues(), columnTypes, partitionComputedStatistics)); } else { throw new IllegalArgumentException(format("Unsupported update mode: %s", partitionUpdate.getUpdateMode())); @@ -1159,7 +1216,6 @@ private Partition buildPartitionObject(ConnectorSession session, Table table, Pa .setParameters(ImmutableMap.builder() .put(PRESTO_VERSION_NAME, prestoVersion) .put(PRESTO_QUERY_ID_NAME, session.getQueryId()) - .putAll(partitionUpdate.getStatistics().toPartitionParameters()) .build()) .withStorage(storage -> storage .setStorageFormat(isRespectTableFormat(session) ? @@ -1171,6 +1227,26 @@ private Partition buildPartitionObject(ConnectorSession session, Table table, Pa .build(); } + private PartitionStatistics createPartitionStatistics( + ConnectorSession session, + HiveBasicStatistics basicStatistics, + List partitionValues, + Map columnTypes, + Map, ComputedStatistics> partitionComputedStatistics) + { + Map computedColumnStatistics = Optional.ofNullable(partitionComputedStatistics.get(partitionValues)) + .map(ComputedStatistics::getColumnStatistics) + .orElse(ImmutableMap.of()); + verify(basicStatistics.getRowCount().isPresent(), "rowCount is not present"); + Map columnStatistics = fromComputedStatistics( + session, + timeZone, + computedColumnStatistics, + columnTypes, + basicStatistics.getRowCount().getAsLong()); + return new PartitionStatistics(basicStatistics, columnStatistics); + } + @Override public void createView(ConnectorSession session, SchemaTableName viewName, String viewData, boolean replace) { @@ -1211,7 +1287,7 @@ public void createView(ConnectorSession session, SchemaTableName viewName, Strin } try { - metastore.createTable(session, table, principalPrivileges, Optional.empty(), false); + metastore.createTable(session, table, principalPrivileges, Optional.empty(), false, new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); } catch (TableAlreadyExistsException e) { throw new ViewAlreadyExistsException(e.getTableName()); @@ -1498,6 +1574,73 @@ public Optional getNewTableLayout(ConnectorSession sess bucketedBy)); } + @Override + public TableStatisticsMetadata getNewTableStatisticsMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + if (!metastore.supportsColumnStatistics()) { + return TableStatisticsMetadata.empty(); + } + validatePartitionColumns(tableMetadata); + validateBucketColumns(tableMetadata); + if (!isCollectColumnStatisticsOnWriteEnabled(session, tableMetadata)) { + return TableStatisticsMetadata.empty(); + } + List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); + return getTableStatisticsMetadata(tableMetadata.getColumns(), partitionedBy); + } + + @Override + public TableStatisticsMetadata getInsertIntoTableStatisticsMetadata(ConnectorSession session, ConnectorTableHandle tableHandle) + { + if (!metastore.supportsColumnStatistics()) { + return TableStatisticsMetadata.empty(); + } + ConnectorTableMetadata tableMetadata = getTableMetadata(session, tableHandle); + if (!isCollectColumnStatisticsOnWriteEnabled(session, tableMetadata)) { + return TableStatisticsMetadata.empty(); + } + List partitionedBy = getPartitionedBy(tableMetadata.getProperties()); + return getTableStatisticsMetadata(tableMetadata.getColumns(), partitionedBy == null ? ImmutableList.of() : partitionedBy); + } + + private static boolean isCollectColumnStatisticsOnWriteEnabled(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + HiveClientConfig.CollectColumnStatisticsOnWriteOption value = getCollectColumnStatisticsOnWrite(session); + switch (value) { + case ENABLED: + return true; + case ENABLED_FOR_MARKED_TABLES: + return HiveTableProperties.isCollectColumnStatisticsOnWriteEnabled(tableMetadata.getProperties()); + case DISABLED: + return false; + default: + throw new IllegalArgumentException("Unexpected collect_column_statistics_on_write option: " + value); + } + } + + private TableStatisticsMetadata getTableStatisticsMetadata(List columns, List partitionedBy) + { + Set columnStatistics = columns.stream() + .filter(column -> !partitionedBy.contains(column.getName())) + .filter(column -> !column.isHidden()) + .map(this::getColumnStatistics) + .flatMap(List::stream) + .collect(toImmutableSet()); + return new TableStatisticsMetadata(columnStatistics, ImmutableSet.of(), partitionedBy); + } + + private List getColumnStatistics(ColumnMetadata columnMetadata) + { + return getColumnStatistics(columnMetadata.getName(), collectibleStatisticsProvider.get(columnMetadata.getType())); + } + + private List getColumnStatistics(String columnName, Set statisticTypes) + { + return statisticTypes.stream() + .map(type -> new ColumnStatisticMetadata(columnName, type)) + .collect(toImmutableList()); + } + @Override public void grantTablePrivileges(ConnectorSession session, SchemaTableName schemaTableName, Set privileges, String grantee, boolean grantOption) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java index 60259b9ae3871..d0627c10b4746 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadataFactory.java @@ -50,6 +50,7 @@ public class HiveMetadataFactory private final BoundedExecutor renameExecution; private final TypeTranslator typeTranslator; private final String prestoVersion; + private final CollectibleStatisticsProvider collectibleStatisticsProvider; @Inject @SuppressWarnings("deprecation") @@ -64,7 +65,8 @@ public HiveMetadataFactory( TableParameterCodec tableParameterCodec, JsonCodec partitionUpdateCodec, TypeTranslator typeTranslator, - NodeVersion nodeVersion) + NodeVersion nodeVersion, + CollectibleStatisticsProvider collectibleStatisticsProvider) { this( metastore, @@ -84,7 +86,8 @@ public HiveMetadataFactory( partitionUpdateCodec, executorService, typeTranslator, - nodeVersion.toString()); + nodeVersion.toString(), + collectibleStatisticsProvider); } public HiveMetadataFactory( @@ -105,7 +108,8 @@ public HiveMetadataFactory( JsonCodec partitionUpdateCodec, ExecutorService executorService, TypeTranslator typeTranslator, - String prestoVersion) + String prestoVersion, + CollectibleStatisticsProvider collectibleStatisticsProvider) { this.allowCorruptWritesForTesting = allowCorruptWritesForTesting; this.skipDeletionForAlter = skipDeletionForAlter; @@ -133,6 +137,7 @@ public HiveMetadataFactory( } renameExecution = new BoundedExecutor(executorService, maxConcurrentFileRenames); + this.collectibleStatisticsProvider = requireNonNull(collectibleStatisticsProvider, "collectibleStatisticsProvider is null"); } public HiveMetadata create() @@ -157,7 +162,8 @@ public HiveMetadata create() partitionUpdateCodec, typeTranslator, prestoVersion, - new MetastoreHiveStatisticsProvider(typeManager, metastore, timeZone), - maxPartitions); + new MetastoreHiveStatisticsProvider(typeManager, metastore, timeZone, collectibleStatisticsProvider), + maxPartitions, + collectibleStatisticsProvider); } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java index 7ce908743b3e3..7fe7eb89ec1b9 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveSessionProperties.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.hive; +import com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption; import com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.session.PropertyMetadata; @@ -68,6 +69,7 @@ public final class HiveSessionProperties private static final String SORTED_WRITING_ENABLED = "sorted_writing_enabled"; private static final String WRITER_SORT_BUFFER_SIZE = "writer_sort_buffer_size"; private static final String STATISTICS_ENABLED = "statistics_enabled"; + private static final String COLLECT_COLUMN_STATISTICS_ON_WRITE = "collect_column_statistics_on_write"; private final List> sessionProperties; @@ -246,7 +248,16 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon STATISTICS_ENABLED, "Experimental: Expose table statistics", hiveClientConfig.isTableStatisticsEnabled(), - false)); + false), + new PropertyMetadata<>( + COLLECT_COLUMN_STATISTICS_ON_WRITE, + "Experimental: Enables automatic column level statistics collection on write", + createUnboundedVarcharType(), + CollectColumnStatisticsOnWriteOption.class, + hiveClientConfig.getCollectColumnStatisticsOnWrite(), + false, + value -> CollectColumnStatisticsOnWriteOption.valueOf(((String) value).toUpperCase(ENGLISH)), + CollectColumnStatisticsOnWriteOption::toString)); } public List> getSessionProperties() @@ -414,6 +425,11 @@ public static boolean isStatisticsEnabled(ConnectorSession session) return session.getProperty(STATISTICS_ENABLED, Boolean.class); } + public static CollectColumnStatisticsOnWriteOption getCollectColumnStatisticsOnWrite(ConnectorSession session) + { + return session.getProperty(COLLECT_COLUMN_STATISTICS_ON_WRITE, CollectColumnStatisticsOnWriteOption.class); + } + public static PropertyMetadata dataSizeSessionProperty(String name, String description, DataSize defaultValue, boolean hidden) { return new PropertyMetadata<>( diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java index 55280d0ac2643..c6ee106533bc0 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java @@ -30,6 +30,7 @@ import static com.facebook.presto.hive.metastore.SortingColumn.Order.ASCENDING; import static com.facebook.presto.hive.metastore.SortingColumn.Order.DESCENDING; import static com.facebook.presto.spi.StandardErrorCode.INVALID_TABLE_PROPERTY; +import static com.facebook.presto.spi.session.PropertyMetadata.booleanSessionProperty; import static com.facebook.presto.spi.session.PropertyMetadata.doubleSessionProperty; import static com.facebook.presto.spi.session.PropertyMetadata.integerSessionProperty; import static com.facebook.presto.spi.session.PropertyMetadata.stringSessionProperty; @@ -49,6 +50,7 @@ public class HiveTableProperties public static final String SORTED_BY_PROPERTY = "sorted_by"; public static final String ORC_BLOOM_FILTER_COLUMNS = "orc_bloom_filter_columns"; public static final String ORC_BLOOM_FILTER_FPP = "orc_bloom_filter_fpp"; + public static final String COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED = "collect_column_statistics_on_write_enabled"; private final List> tableProperties; @@ -124,7 +126,12 @@ public HiveTableProperties(TypeManager typeManager, HiveClientConfig config) "ORC Bloom filter false positive probability", config.getOrcDefaultBloomFilterFpp(), false), - integerSessionProperty(BUCKET_COUNT_PROPERTY, "Number of buckets", 0, false)); + integerSessionProperty(BUCKET_COUNT_PROPERTY, "Number of buckets", 0, false), + booleanSessionProperty( + COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED, + "Experimental: Enables automatic column level statistics collection on write", + false, + false)); } public List> getTableProperties() @@ -192,6 +199,12 @@ public static Double getOrcBloomFilterFpp(Map tableProperties) return (Double) tableProperties.get(ORC_BLOOM_FILTER_FPP); } + public static boolean isCollectColumnStatisticsOnWriteEnabled(Map tableProperties) + { + Boolean enabled = (Boolean) tableProperties.get(COLLECT_COLUMN_STATISTICS_ON_WRITE_ENABLED); + return enabled != null && enabled; + } + private static SortingColumn sortingColumnFromString(String name) { SortingColumn.Order order = ASCENDING; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java index 36994feb17d20..bf656617278bd 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriteUtils.java @@ -21,6 +21,7 @@ import com.facebook.presto.hive.metastore.Storage; import com.facebook.presto.hive.metastore.Table; import com.facebook.presto.hive.s3.PrestoS3FileSystem; +import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaNotFoundException; import com.facebook.presto.spi.SchemaTableName; @@ -41,6 +42,7 @@ import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.VarbinaryType; import com.facebook.presto.spi.type.VarcharType; +import com.google.common.base.CharMatcher; import com.google.common.collect.ImmutableList; import com.google.common.primitives.Shorts; import com.google.common.primitives.SignedBytes; @@ -105,8 +107,10 @@ import static com.facebook.presto.hive.HiveErrorCode.HIVE_DATABASE_LOCATION_ERROR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR; +import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE; import static com.facebook.presto.hive.HiveErrorCode.HIVE_SERDE_NOT_FOUND; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_DATA_ERROR; +import static com.facebook.presto.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION; import static com.facebook.presto.hive.HiveUtil.checkCondition; import static com.facebook.presto.hive.HiveUtil.isArrayType; import static com.facebook.presto.hive.HiveUtil.isMapType; @@ -116,9 +120,11 @@ import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.type.Chars.isCharType; import static com.google.common.base.Strings.padEnd; +import static com.google.common.io.BaseEncoding.base16; import static java.lang.Float.intBitsToFloat; import static java.lang.Math.toIntExact; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; import static java.util.UUID.randomUUID; import static java.util.stream.Collectors.toList; @@ -298,6 +304,27 @@ else if (isRowType(type)) { throw new IllegalArgumentException("unsupported type: " + type); } + public static List createPartitionValues(List partitionColumnTypes, Page partitionColumns, int position) + { + ImmutableList.Builder partitionValues = ImmutableList.builder(); + for (int field = 0; field < partitionColumns.getChannelCount(); field++) { + Object value = getField(partitionColumnTypes.get(field), partitionColumns.getBlock(field), position); + if (value == null) { + partitionValues.add(HIVE_DEFAULT_DYNAMIC_PARTITION); + } + else { + String valueString = value.toString(); + if (!CharMatcher.inRange((char) 0x20, (char) 0x7E).matchesAllOf(valueString)) { + throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, + "Hive partition keys can only contain printable ASCII characters (0x20 - 0x7E). Invalid value: " + + base16().withSeparator(" ", 2).encode(valueString.getBytes(UTF_8))); + } + partitionValues.add(valueString); + } + } + return partitionValues.build(); + } + public static Object getField(Type type, Block block, int position) { if (block.isNull(position)) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java index b4ad8b04875d2..b0ac13be9e410 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveWriterFactory.java @@ -32,7 +32,6 @@ import com.facebook.presto.spi.session.PropertyMetadata; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; -import com.google.common.base.CharMatcher; import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; @@ -65,16 +64,14 @@ import static com.facebook.presto.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA; -import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE; import static com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_READ_ONLY; import static com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH; import static com.facebook.presto.hive.HiveErrorCode.HIVE_PATH_ALREADY_EXISTS; import static com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT; import static com.facebook.presto.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR; -import static com.facebook.presto.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION; import static com.facebook.presto.hive.HiveSessionProperties.getWriterSortBufferSize; import static com.facebook.presto.hive.HiveType.toHiveTypes; -import static com.facebook.presto.hive.HiveWriteUtils.getField; +import static com.facebook.presto.hive.HiveWriteUtils.createPartitionValues; import static com.facebook.presto.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY; import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema; import static com.facebook.presto.hive.metastore.StorageFormat.fromHiveStorageFormat; @@ -84,10 +81,8 @@ import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.io.BaseEncoding.base16; import static java.lang.Math.min; import static java.lang.String.format; -import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; import static java.util.UUID.randomUUID; import static java.util.function.Function.identity; @@ -270,7 +265,7 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt fileName = filePrefix + "_" + randomUUID(); } - List partitionValues = toPartitionValues(partitionColumns, position); + List partitionValues = createPartitionValues(partitionColumnTypes, partitionColumns, position); Optional partitionName; if (!partitionColumnNames.isEmpty()) { @@ -581,27 +576,6 @@ private void validateSchema(Optional partitionName, Properties schema) } } - private List toPartitionValues(Page partitionColumns, int position) - { - ImmutableList.Builder partitionValues = ImmutableList.builder(); - for (int field = 0; field < partitionColumns.getChannelCount(); field++) { - Object value = getField(partitionColumnTypes.get(field), partitionColumns.getBlock(field), position); - if (value == null) { - partitionValues.add(HIVE_DEFAULT_DYNAMIC_PARTITION); - } - else { - String valueString = value.toString(); - if (!CharMatcher.inRange((char) 0x20, (char) 0x7E).matchesAllOf(valueString)) { - throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, - "Hive partition keys can only contain printable ASCII characters (0x20 - 0x7E). Invalid value: " + - base16().withSeparator(" ", 2).encode(valueString.getBytes(UTF_8))); - } - partitionValues.add(valueString); - } - } - return partitionValues.build(); - } - public static String computeBucketedFileName(String filePrefix, int bucket) { return filePrefix + "_bucket-" + Strings.padStart(Integer.toString(bucket), BUCKET_NUMBER_PADDING, '0'); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/PartitionStatistics.java b/presto-hive/src/main/java/com/facebook/presto/hive/PartitionStatistics.java index 59193f1745ff9..3e1bfb078ee0b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/PartitionStatistics.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/PartitionStatistics.java @@ -18,14 +18,23 @@ import com.google.common.collect.ImmutableMap; import java.util.Map; +import java.util.Objects; +import static com.google.common.base.MoreObjects.toStringHelper; import static java.util.Objects.requireNonNull; public class PartitionStatistics { + private static final PartitionStatistics EMPTY = new PartitionStatistics(HiveBasicStatistics.createEmptyStatistics(), ImmutableMap.of()); + private final HiveBasicStatistics basicStatistics; private final Map columnStatistics; + public static PartitionStatistics empty() + { + return EMPTY; + } + public PartitionStatistics( HiveBasicStatistics basicStatistics, Map columnStatistics) @@ -43,4 +52,33 @@ public Map getColumnStatistics() { return columnStatistics; } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PartitionStatistics that = (PartitionStatistics) o; + return Objects.equals(basicStatistics, that.basicStatistics) && + Objects.equals(columnStatistics, that.columnStatistics); + } + + @Override + public int hashCode() + { + return Objects.hash(basicStatistics, columnStatistics); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("basicStatistics", basicStatistics) + .add("columnStatistics", columnStatistics) + .toString(); + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/CachingHiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/CachingHiveMetastore.java index b6902da8929fd..6bddcae2d6773 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/CachingHiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/CachingHiveMetastore.java @@ -16,6 +16,7 @@ import com.facebook.presto.hive.ForCachingHiveMetastore; import com.facebook.presto.hive.HiveClientConfig; import com.facebook.presto.hive.HiveType; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.spi.PrestoException; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; @@ -44,6 +45,7 @@ import java.util.concurrent.ExecutorService; import java.util.function.Function; +import static com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY; import static com.facebook.presto.hive.HiveUtil.toPartitionValues; import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; @@ -72,8 +74,8 @@ public class CachingHiveMetastore private final LoadingCache> databaseNamesCache; private final LoadingCache> tableCache; private final LoadingCache>> tableNamesCache; - private final LoadingCache> tableColumnStatisticsCache; - private final LoadingCache> partitionColumnStatisticsCache; + private final LoadingCache tableStatisticsCache; + private final LoadingCache partitionStatisticsCache; private final LoadingCache>> viewNamesCache; private final LoadingCache> partitionCache; private final LoadingCache>> partitionFilterCache; @@ -126,27 +128,27 @@ private CachingHiveMetastore(ExtendedHiveMetastore delegate, ExecutorService exe tableNamesCache = newCacheBuilder(expiresAfterWriteMillis, refreshMills, maximumSize) .build(asyncReloading(CacheLoader.from(this::loadAllTables), executor)); - tableColumnStatisticsCache = newCacheBuilder(expiresAfterWriteMillis, refreshMills, maximumSize) - .build(asyncReloading(new CacheLoader>() + tableStatisticsCache = newCacheBuilder(expiresAfterWriteMillis, refreshMills, maximumSize) + .build(asyncReloading(new CacheLoader() { @Override - public Map load(HiveTableName key) + public PartitionStatistics load(HiveTableName key) { return loadTableColumnStatistics(key); } }, executor)); - partitionColumnStatisticsCache = newCacheBuilder(expiresAfterWriteMillis, refreshMills, maximumSize) - .build(asyncReloading(new CacheLoader>() + partitionStatisticsCache = newCacheBuilder(expiresAfterWriteMillis, refreshMills, maximumSize) + .build(asyncReloading(new CacheLoader() { @Override - public Map load(HivePartitionName key) + public PartitionStatistics load(HivePartitionName key) { return loadPartitionColumnStatistics(key); } @Override - public Map> loadAll(Iterable keys) + public Map loadAll(Iterable keys) { return loadPartitionColumnStatistics(keys); } @@ -199,6 +201,8 @@ public void flushCache() partitionCache.invalidateAll(); partitionFilterCache.invalidateAll(); userTablePrivileges.invalidateAll(); + tableStatisticsCache.invalidateAll(); + partitionStatisticsCache.invalidateAll(); } private static V get(LoadingCache cache, K key) @@ -252,66 +256,94 @@ public Optional getTable(String databaseName, String tableName) return get(tableCache, HiveTableName.table(databaseName, tableName)); } + @Override + public boolean supportsColumnStatistics() + { + return delegate.supportsColumnStatistics(); + } + private Optional
loadTable(HiveTableName hiveTableName) { return delegate.getTable(hiveTableName.getDatabaseName(), hiveTableName.getTableName()); } @Override - public Map getTableColumnStatistics(String databaseName, String tableName) + public PartitionStatistics getTableStatistics(String databaseName, String tableName) { - return get(tableColumnStatisticsCache, new HiveTableName(databaseName, tableName)); + return get(tableStatisticsCache, new HiveTableName(databaseName, tableName)); } - private Map loadTableColumnStatistics(HiveTableName hiveTableName) + private PartitionStatistics loadTableColumnStatistics(HiveTableName hiveTableName) { - return delegate.getTableColumnStatistics(hiveTableName.getDatabaseName(), hiveTableName.getTableName()); + return delegate.getTableStatistics(hiveTableName.getDatabaseName(), hiveTableName.getTableName()); } @Override - public Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames) + public Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) { List partitions = partitionNames.stream() .map(partitionName -> HivePartitionName.partition(databaseName, tableName, partitionName)) .collect(toImmutableList()); - Map> statistics = getAll(partitionColumnStatisticsCache, partitions); + Map statistics = getAll(partitionStatisticsCache, partitions); return statistics.entrySet() .stream() - .filter(entry -> !entry.getValue().isEmpty()) .collect(toImmutableMap(entry -> entry.getKey().getPartitionName(), Entry::getValue)); } - private Map loadPartitionColumnStatistics(HivePartitionName partition) + private PartitionStatistics loadPartitionColumnStatistics(HivePartitionName partition) { - Map> columnStatistics = delegate.getPartitionColumnStatistics( + Map partitionStatistics = delegate.getPartitionStatistics( partition.getHiveTableName().getDatabaseName(), partition.getHiveTableName().getTableName(), ImmutableSet.of(partition.getPartitionName())); - return columnStatistics.getOrDefault(partition.getPartitionName(), ImmutableMap.of()); + if (!partitionStatistics.containsKey(partition.getPartitionName())) { + throw new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + partition.getPartitionName()); + } + return partitionStatistics.get(partition.getPartitionName()); } - private Map> loadPartitionColumnStatistics(Iterable keys) + private Map loadPartitionColumnStatistics(Iterable keys) { SetMultimap tablePartitions = stream(keys) .collect(toImmutableSetMultimap(HivePartitionName::getHiveTableName, key -> key)); - ImmutableMap.Builder> result = ImmutableMap.builder(); + ImmutableMap.Builder result = ImmutableMap.builder(); tablePartitions.keySet().forEach(table -> { Set partitionNames = tablePartitions.get(table).stream() .map(HivePartitionName::getPartitionName) .collect(toImmutableSet()); - Map> partitionStatistics = delegate.getPartitionColumnStatistics(table.getDatabaseName(), table.getTableName(), partitionNames); + Map partitionStatistics = delegate.getPartitionStatistics(table.getDatabaseName(), table.getTableName(), partitionNames); for (String partitionName : partitionNames) { - if (partitionStatistics.containsKey(partitionName)) { - result.put(HivePartitionName.partition(table, partitionName), partitionStatistics.get(partitionName)); - } - else { - result.put(HivePartitionName.partition(table, partitionName), ImmutableMap.of()); + if (!partitionStatistics.containsKey(partitionName)) { + throw new PrestoException(HIVE_PARTITION_DROPPED_DURING_QUERY, "Statistics result does not contain entry for partition: " + partitionName); } + result.put(HivePartitionName.partition(table, partitionName), partitionStatistics.get(partitionName)); } }); return result.build(); } + @Override + public void updateTableStatistics(String databaseName, String tableName, Function update) + { + try { + delegate.updateTableStatistics(databaseName, tableName, update); + } + finally { + tableStatisticsCache.invalidate(HiveTableName.table(databaseName, tableName)); + } + } + + @Override + public void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update) + { + try { + delegate.updatePartitionStatistics(databaseName, tableName, partitionName, update); + } + finally { + partitionStatisticsCache.invalidate(HivePartitionName.partition(databaseName, tableName, partitionName)); + } + } + @Override public Optional> getAllTables(String databaseName) { @@ -473,6 +505,7 @@ protected void invalidateTable(String databaseName, String tableName) .filter(userTableKey -> userTableKey.matches(databaseName, tableName)) .forEach(userTablePrivileges::invalidate); invalidatePartitionCache(databaseName, tableName); + tableStatisticsCache.invalidate(new HiveTableName(databaseName, tableName)); } @Override @@ -608,6 +641,9 @@ private void invalidatePartitionCache(String databaseName, String tableName) partitionFilterCache.asMap().keySet().stream() .filter(partitionFilter -> partitionFilter.getHiveTableName().equals(hiveTableName)) .forEach(partitionFilterCache::invalidate); + partitionStatisticsCache.asMap().keySet().stream() + .filter(partitionFilter -> partitionFilter.getHiveTableName().equals(hiveTableName)) + .forEach(partitionStatisticsCache::invalidate); } @Override diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/ExtendedHiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/ExtendedHiveMetastore.java index ca0e2020234a9..785ddb84f8e21 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/ExtendedHiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/ExtendedHiveMetastore.java @@ -14,6 +14,7 @@ package com.facebook.presto.hive.metastore; import com.facebook.presto.hive.HiveType; +import com.facebook.presto.hive.PartitionStatistics; import java.util.List; import java.util.Map; @@ -29,9 +30,15 @@ public interface ExtendedHiveMetastore Optional
getTable(String databaseName, String tableName); - Map getTableColumnStatistics(String databaseName, String tableName); + boolean supportsColumnStatistics(); - Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames); + PartitionStatistics getTableStatistics(String databaseName, String tableName); + + Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames); + + void updateTableStatistics(String databaseName, String tableName, Function update); + + void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update); Optional> getAllTables(String databaseName); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/HiveColumnStatistics.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/HiveColumnStatistics.java index 3cbf1ed3b94c3..9d9628eff237b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/HiveColumnStatistics.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/HiveColumnStatistics.java @@ -14,14 +14,23 @@ package com.facebook.presto.hive.metastore; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeInfo; + +import java.util.Objects; import java.util.Optional; import java.util.OptionalDouble; import java.util.OptionalLong; +import static com.fasterxml.jackson.annotation.JsonTypeInfo.Id.NAME; +import static com.google.common.base.MoreObjects.toStringHelper; +import static java.util.Objects.requireNonNull; + public class HiveColumnStatistics { - private final Optional lowValue; - private final Optional highValue; + private final Optional> lowValue; + private final Optional> highValue; private final OptionalLong maxColumnLength; private final OptionalDouble averageColumnLength; private final OptionalLong trueCount; @@ -29,63 +38,201 @@ public class HiveColumnStatistics private final OptionalLong nullsCount; private final OptionalLong distinctValuesCount; + @JsonCreator public HiveColumnStatistics( - Optional lowValue, - Optional highValue, - OptionalLong maxColumnLength, - OptionalDouble averageColumnLength, - OptionalLong trueCount, - OptionalLong falseCount, - OptionalLong nullsCount, - OptionalLong distinctValuesCount) + @JsonProperty("lowValue") Optional> lowValue, + @JsonProperty("highValue") Optional> highValue, + @JsonProperty("maxColumnLength") OptionalLong maxColumnLength, + @JsonProperty("averageColumnLength") OptionalDouble averageColumnLength, + @JsonProperty("trueCount") OptionalLong trueCount, + @JsonProperty("falseCount") OptionalLong falseCount, + @JsonProperty("nullsCount") OptionalLong nullsCount, + @JsonProperty("distinctValuesCount") OptionalLong distinctValuesCount) { - this.lowValue = lowValue; - this.highValue = highValue; - this.maxColumnLength = maxColumnLength; - this.averageColumnLength = averageColumnLength; - this.trueCount = trueCount; - this.falseCount = falseCount; - this.nullsCount = nullsCount; - this.distinctValuesCount = distinctValuesCount; + this.lowValue = requireNonNull(lowValue, "lowValue is null"); + this.highValue = requireNonNull(highValue, "highValue is null"); + this.maxColumnLength = requireNonNull(maxColumnLength, "maxColumnLength is null"); + this.averageColumnLength = requireNonNull(averageColumnLength, "averageColumnLength is null"); + this.trueCount = requireNonNull(trueCount, "trueCount is null"); + this.falseCount = requireNonNull(falseCount, "falseCount is null"); + this.nullsCount = requireNonNull(nullsCount, "nullsCount is null"); + this.distinctValuesCount = requireNonNull(distinctValuesCount, "distinctValuesCount is null"); } - public Optional getLowValue() + @JsonProperty + @JsonTypeInfo(use = NAME, property = "@lowValueClass") + public Optional> getLowValue() { return lowValue; } - public Optional getHighValue() + @JsonProperty + @JsonTypeInfo(use = NAME, property = "@highValueClass") + public Optional> getHighValue() { return highValue; } + @JsonProperty public OptionalLong getMaxColumnLength() { return maxColumnLength; } + @JsonProperty public OptionalDouble getAverageColumnLength() { return averageColumnLength; } + @JsonProperty public OptionalLong getTrueCount() { return trueCount; } + @JsonProperty public OptionalLong getFalseCount() { return falseCount; } + @JsonProperty public OptionalLong getNullsCount() { return nullsCount; } + @JsonProperty public OptionalLong getDistinctValuesCount() { return distinctValuesCount; } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + HiveColumnStatistics that = (HiveColumnStatistics) o; + return Objects.equals(lowValue, that.lowValue) && + Objects.equals(highValue, that.highValue) && + Objects.equals(maxColumnLength, that.maxColumnLength) && + Objects.equals(averageColumnLength, that.averageColumnLength) && + Objects.equals(trueCount, that.trueCount) && + Objects.equals(falseCount, that.falseCount) && + Objects.equals(nullsCount, that.nullsCount) && + Objects.equals(distinctValuesCount, that.distinctValuesCount); + } + + @Override + public int hashCode() + { + return Objects.hash( + lowValue, + highValue, + maxColumnLength, + averageColumnLength, + trueCount, + falseCount, + nullsCount, + distinctValuesCount); + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("lowValue", lowValue) + .add("highValue", highValue) + .add("maxColumnLength", maxColumnLength) + .add("averageColumnLength", averageColumnLength) + .add("trueCount", trueCount) + .add("falseCount", falseCount) + .add("nullsCount", nullsCount) + .add("distinctValuesCount", distinctValuesCount) + .toString(); + } + + public static Builder builder() + { + return new Builder(); + } + + public static class Builder + { + private Optional> lowValue = Optional.empty(); + private Optional> highValue = Optional.empty(); + private OptionalLong maxColumnLength = OptionalLong.empty(); + private OptionalDouble averageColumnLength = OptionalDouble.empty(); + private OptionalLong trueCount = OptionalLong.empty(); + private OptionalLong falseCount = OptionalLong.empty(); + private OptionalLong nullsCount = OptionalLong.empty(); + private OptionalLong distinctValuesCount = OptionalLong.empty(); + + public Builder setLowValue(Comparable lowValue) + { + this.lowValue = Optional.of(lowValue); + return this; + } + + public Builder setHighValue(Comparable highValue) + { + this.highValue = Optional.of(highValue); + return this; + } + + public Builder setMaxColumnLength(long maxColumnLength) + { + this.maxColumnLength = OptionalLong.of(maxColumnLength); + return this; + } + + public Builder setAverageColumnLength(double averageColumnLength) + { + this.averageColumnLength = OptionalDouble.of(averageColumnLength); + return this; + } + + public Builder setTrueCount(long trueCount) + { + this.trueCount = OptionalLong.of(trueCount); + return this; + } + + public Builder setFalseCount(long falseCount) + { + this.falseCount = OptionalLong.of(falseCount); + return this; + } + + public Builder setNullsCount(long nullsCount) + { + this.nullsCount = OptionalLong.of(nullsCount); + return this; + } + + public Builder setDistinctValuesCount(long distinctValuesCount) + { + this.distinctValuesCount = OptionalLong.of(distinctValuesCount); + return this; + } + + public HiveColumnStatistics build() + { + return new HiveColumnStatistics( + lowValue, + highValue, + maxColumnLength, + averageColumnLength, + trueCount, + falseCount, + nullsCount, + distinctValuesCount); + } + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/SemiTransactionalHiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/SemiTransactionalHiveMetastore.java index 52c23d1d2f2d9..82f1875565df6 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/SemiTransactionalHiveMetastore.java @@ -16,10 +16,10 @@ import com.facebook.presto.hadoop.HadoopFileStatus; import com.facebook.presto.hive.HdfsEnvironment; import com.facebook.presto.hive.HdfsEnvironment.HdfsContext; -import com.facebook.presto.hive.HiveBasicStatistics; import com.facebook.presto.hive.HiveType; import com.facebook.presto.hive.LocationHandle.WriteMode; import com.facebook.presto.hive.PartitionNotFoundException; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.TableAlreadyExistsException; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.PrestoException; @@ -65,9 +65,9 @@ import static com.facebook.presto.hive.HiveWriteUtils.createDirectory; import static com.facebook.presto.hive.HiveWriteUtils.pathExists; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; -import static com.facebook.presto.hive.util.Statistics.ReduceOperator.ADD; import static com.facebook.presto.hive.util.Statistics.ReduceOperator.SUBTRACT; -import static com.facebook.presto.hive.util.Statistics.updateStatistics; +import static com.facebook.presto.hive.util.Statistics.merge; +import static com.facebook.presto.hive.util.Statistics.reduce; import static com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.StandardErrorCode.TRANSACTION_CONFLICT; @@ -75,6 +75,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.hive.common.FileUtils.makePartName; @@ -150,25 +151,31 @@ public synchronized Optional
getTable(String databaseName, String tableNa } } - public synchronized Map getTableColumnStatistics(String databaseName, String tableName) + public synchronized boolean supportsColumnStatistics() + { + return delegate.supportsColumnStatistics(); + } + + public synchronized PartitionStatistics getTableStatistics(String databaseName, String tableName) { checkReadable(); Action tableAction = tableActions.get(new SchemaTableName(databaseName, tableName)); if (tableAction == null) { - return delegate.getTableColumnStatistics(databaseName, tableName); + return delegate.getTableStatistics(databaseName, tableName); } switch (tableAction.getType()) { case ADD: case ALTER: case INSERT_EXISTING: + return tableAction.getData().getStatistics(); case DROP: - return ImmutableMap.of(); + return PartitionStatistics.empty(); default: throw new IllegalStateException("Unknown action type"); } } - public synchronized Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames) + public synchronized Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) { checkReadable(); Optional
table = getTable(databaseName, tableName); @@ -178,7 +185,7 @@ public synchronized Map> getPartitionC TableSource tableSource = getTableSource(databaseName, tableName); Map, Action> partitionActionsOfTable = partitionActions.computeIfAbsent(new SchemaTableName(databaseName, tableName), k -> new HashMap<>()); ImmutableSet.Builder partitionNamesToQuery = ImmutableSet.builder(); - ImmutableMap.Builder> resultBuilder = ImmutableMap.builder(); + ImmutableMap.Builder resultBuilder = ImmutableMap.builder(); for (String partitionName : partitionNames) { List partitionValues = toPartitionValues(partitionName); Action partitionAction = partitionActionsOfTable.get(partitionValues); @@ -188,23 +195,23 @@ public synchronized Map> getPartitionC partitionNamesToQuery.add(partitionName); break; case CREATED_IN_THIS_TRANSACTION: - resultBuilder.put(partitionName, ImmutableMap.of()); + resultBuilder.put(partitionName, PartitionStatistics.empty()); break; default: throw new UnsupportedOperationException("unknown table source"); } } else { - resultBuilder.put(partitionName, ImmutableMap.of()); + resultBuilder.put(partitionName, partitionAction.getData().getStatistics()); } } - Map> delegateResult = delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNamesToQuery.build()); + Map delegateResult = delegate.getPartitionStatistics(databaseName, tableName, partitionNamesToQuery.build()); if (!delegateResult.isEmpty()) { resultBuilder.putAll(delegateResult); } else { - partitionNamesToQuery.build().forEach(partionName -> resultBuilder.put(partionName, ImmutableMap.of())); + partitionNamesToQuery.build().forEach(partitionName -> resultBuilder.put(partitionName, PartitionStatistics.empty())); } return resultBuilder.build(); } @@ -288,14 +295,20 @@ public synchronized void renameDatabase(String source, String target) /** * {@code currentLocation} needs to be supplied if a writePath exists for the table. */ - public synchronized void createTable(ConnectorSession session, Table table, PrincipalPrivileges principalPrivileges, Optional currentPath, boolean ignoreExisting) + public synchronized void createTable( + ConnectorSession session, + Table table, + PrincipalPrivileges principalPrivileges, + Optional currentPath, + boolean ignoreExisting, + PartitionStatistics statistics) { setShared(); // When creating a table, it should never have partition actions. This is just a sanity check. checkNoPartitionAction(table.getDatabaseName(), table.getTableName()); SchemaTableName schemaTableName = new SchemaTableName(table.getDatabaseName(), table.getTableName()); Action oldTableAction = tableActions.get(schemaTableName); - TableAndMore tableAndMore = new TableAndMore(table, Optional.of(principalPrivileges), currentPath, Optional.empty(), ignoreExisting, Optional.empty()); + TableAndMore tableAndMore = new TableAndMore(table, Optional.of(principalPrivileges), currentPath, Optional.empty(), ignoreExisting, statistics, statistics); if (oldTableAction == null) { HdfsContext context = new HdfsContext(session, table.getDatabaseName(), table.getTableName()); tableActions.put(schemaTableName, new Action<>(ActionType.ADD, tableAndMore, context)); @@ -368,7 +381,7 @@ public synchronized void finishInsertIntoExistingTable( String tableName, Path currentLocation, List fileNames, - HiveBasicStatistics statistics) + PartitionStatistics statisticsUpdate) { // Data can only be inserted into partitions and unpartitioned tables. They can never be inserted into a partitioned table. // Therefore, this method assumes that the table is unpartitioned. @@ -376,21 +389,22 @@ public synchronized void finishInsertIntoExistingTable( SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); Action oldTableAction = tableActions.get(schemaTableName); if (oldTableAction == null) { - Table originalTable = delegate.getTable(databaseName, tableName) + Table table = delegate.getTable(databaseName, tableName) .orElseThrow(() -> new TableNotFoundException(schemaTableName)); - Table updatedTable = updateStatistics(originalTable, statistics, ADD); + PartitionStatistics currentStatistics = getTableStatistics(databaseName, tableName); HdfsContext context = new HdfsContext(session, databaseName, tableName); tableActions.put( schemaTableName, new Action<>( ActionType.INSERT_EXISTING, new TableAndMore( - updatedTable, + table, Optional.empty(), Optional.of(currentLocation), Optional.of(fileNames), false, - Optional.of(statistics)), + merge(currentStatistics, statisticsUpdate), + statisticsUpdate), context)); return; } @@ -596,7 +610,13 @@ private static Optional getPartitionFromPartitionAction(Action(ActionType.ADD, new PartitionAndMore(partition, currentLocation, Optional.empty(), Optional.empty()), context)); + new Action<>(ActionType.ADD, new PartitionAndMore(partition, currentLocation, Optional.empty(), statistics, statistics), context)); return; } switch (oldPartitionAction.getType()) { @@ -616,7 +636,7 @@ public synchronized void addPartition(ConnectorSession session, String databaseN } partitionActionsOfTable.put( partition.getValues(), - new Action<>(ActionType.ALTER, new PartitionAndMore(partition, currentLocation, Optional.empty(), Optional.empty()), context)); + new Action<>(ActionType.ALTER, new PartitionAndMore(partition, currentLocation, Optional.empty(), statistics, statistics), context)); break; } case ADD: @@ -659,26 +679,31 @@ public synchronized void finishInsertIntoExistingPartition( List partitionValues, Path currentLocation, List fileNames, - HiveBasicStatistics statistics) + PartitionStatistics statisticsUpdate) { setShared(); SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); Map, Action> partitionActionsOfTable = partitionActions.computeIfAbsent(schemaTableName, k -> new HashMap<>()); Action oldPartitionAction = partitionActionsOfTable.get(partitionValues); if (oldPartitionAction == null) { - Partition originalPartition = delegate.getPartition(databaseName, tableName, partitionValues) + Partition partition = delegate.getPartition(databaseName, tableName, partitionValues) .orElseThrow(() -> new PartitionNotFoundException(schemaTableName, partitionValues)); - Partition updatedPartition = updateStatistics(originalPartition, statistics, ADD); + String partitionName = getPartitionName(databaseName, tableName, partitionValues); + PartitionStatistics currentStatistics = delegate.getPartitionStatistics(databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName); + if (currentStatistics == null) { + throw new PrestoException(HIVE_METASTORE_ERROR, "currentStatistics is null"); + } HdfsContext context = new HdfsContext(session, databaseName, tableName); partitionActionsOfTable.put( partitionValues, new Action<>( ActionType.INSERT_EXISTING, new PartitionAndMore( - updatedPartition, + partition, currentLocation, Optional.of(fileNames), - Optional.of(statistics)), + merge(currentStatistics, statisticsUpdate), + statisticsUpdate), context)); return; } @@ -695,6 +720,16 @@ public synchronized void finishInsertIntoExistingPartition( } } + private String getPartitionName(String databaseName, String tableName, List partitionValues) + { + Table table = getTable(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + List columnNames = table.getPartitionColumns().stream() + .map(Column::getName) + .collect(toImmutableList()); + return makePartName(columnNames, partitionValues); + } + public synchronized Set getRoles(String user) { checkReadable(); @@ -1002,6 +1037,11 @@ private void prepareAddTable(HdfsContext context, TableAndMore tableAndMore) } } addTableOperations.add(new CreateTableOperation(table, tableAndMore.getPrincipalPrivileges(), tableAndMore.isIgnoreExisting())); + updateStatisticsOperations.add(new UpdateStatisticsOperation( + new SchemaTableName(table.getDatabaseName(), table.getTableName()), + Optional.empty(), + tableAndMore.getStatisticsUpdate(), + false)); } private void prepareInsertExistingTable(HdfsContext context, TableAndMore tableAndMore) @@ -1015,13 +1055,11 @@ private void prepareInsertExistingTable(HdfsContext context, TableAndMore tableA if (!targetPath.equals(currentPath)) { asyncRename(hdfsEnvironment, renameExecutor, fileRenameCancelled, fileRenameFutures, context, currentPath, targetPath, tableAndMore.getFileNames().get()); } - if (tableAndMore.getStatisticsUpdate().isPresent()) { - updateStatisticsOperations.add( - new UpdateStatisticsOperation( - new SchemaTableName(table.getDatabaseName(), table.getTableName()), - Optional.empty(), - tableAndMore.getStatisticsUpdate().get())); - } + updateStatisticsOperations.add(new UpdateStatisticsOperation( + new SchemaTableName(table.getDatabaseName(), table.getTableName()), + Optional.empty(), + tableAndMore.getStatisticsUpdate(), + true)); } private void prepareDropPartition(SchemaTableName schemaTableName, List partitionValues) @@ -1084,6 +1122,11 @@ private void prepareAlterPartition(HdfsContext context, PartitionAndMore partiti // Partition alter must happen regardless of whether original and current location is the same // because metadata might change: e.g. storage format, column types, etc alterPartitionOperations.add(new AlterPartitionOperation(partition, oldPartition.get())); + updateStatisticsOperations.add(new UpdateStatisticsOperation( + new SchemaTableName(partition.getDatabaseName(), partition.getTableName()), + Optional.of(getPartitionName(partition.getDatabaseName(), partition.getTableName(), partition.getValues())), + partitionAndMore.getStatisticsUpdate(), + false)); } private void prepareAddPartition(HdfsContext context, PartitionAndMore partitionAndMore) @@ -1109,6 +1152,11 @@ private void prepareAddPartition(HdfsContext context, PartitionAndMore partition () -> cleanUpTasksForAbort.add(new DirectoryCleanUpTask(context, targetPath, true))); } partitionAdder.addPartition(partition); + updateStatisticsOperations.add(new UpdateStatisticsOperation( + new SchemaTableName(partition.getDatabaseName(), partition.getTableName()), + Optional.of(getPartitionName(partition.getDatabaseName(), partition.getTableName(), partition.getValues())), + partitionAndMore.getStatisticsUpdate(), + false)); } private void prepareInsertExistingPartition(HdfsContext context, PartitionAndMore partitionAndMore) @@ -1122,13 +1170,11 @@ private void prepareInsertExistingPartition(HdfsContext context, PartitionAndMor if (!targetPath.equals(currentPath)) { asyncRename(hdfsEnvironment, renameExecutor, fileRenameCancelled, fileRenameFutures, context, currentPath, targetPath, partitionAndMore.getFileNames()); } - if (partitionAndMore.getStatisticsUpdate().isPresent()) { - updateStatisticsOperations.add( - new UpdateStatisticsOperation( - new SchemaTableName(partition.getDatabaseName(), partition.getTableName()), - Optional.of(partition.getValues()), - partitionAndMore.getStatisticsUpdate().get())); - } + updateStatisticsOperations.add(new UpdateStatisticsOperation( + new SchemaTableName(partition.getDatabaseName(), partition.getTableName()), + Optional.of(getPartitionName(partition.getDatabaseName(), partition.getTableName(), partition.getValues())), + partitionAndMore.getStatisticsUpdate(), + true)); } private void executeCleanupTasksForAbort(List filePrefixes) @@ -1815,7 +1861,8 @@ private static class TableAndMore private final Optional currentLocation; // unpartitioned table only private final Optional> fileNames; private final boolean ignoreExisting; - private final Optional statisticsUpdate; + private final PartitionStatistics statistics; + private final PartitionStatistics statisticsUpdate; public TableAndMore( Table table, @@ -1823,13 +1870,15 @@ public TableAndMore( Optional currentLocation, Optional> fileNames, boolean ignoreExisting, - Optional statisticsUpdate) + PartitionStatistics statistics, + PartitionStatistics statisticsUpdate) { this.table = requireNonNull(table, "table is null"); this.principalPrivileges = requireNonNull(principalPrivileges, "principalPrivileges is null"); this.currentLocation = requireNonNull(currentLocation, "currentLocation is null"); this.fileNames = requireNonNull(fileNames, "fileNames is null"); this.ignoreExisting = ignoreExisting; + this.statistics = requireNonNull(statistics, "statistics is null"); this.statisticsUpdate = requireNonNull(statisticsUpdate, "statisticsUpdate is null"); checkArgument(!table.getStorage().getLocation().isEmpty() || !currentLocation.isPresent(), "currentLocation can not be supplied for table without location"); @@ -1862,7 +1911,12 @@ public Optional> getFileNames() return fileNames; } - public Optional getStatisticsUpdate() + public PartitionStatistics getStatistics() + { + return statistics; + } + + public PartitionStatistics getStatisticsUpdate() { return statisticsUpdate; } @@ -1876,6 +1930,8 @@ public String toString() .add("currentLocation", currentLocation) .add("fileNames", fileNames) .add("ignoreExisting", ignoreExisting) + .add("statistics", statistics) + .add("statisticsUpdate", statisticsUpdate) .toString(); } } @@ -1885,13 +1941,15 @@ private static class PartitionAndMore private final Partition partition; private final Path currentLocation; private final Optional> fileNames; - private final Optional statisticsUpdate; + private final PartitionStatistics statistics; + private final PartitionStatistics statisticsUpdate; - public PartitionAndMore(Partition partition, Path currentLocation, Optional> fileNames, Optional statisticsUpdate) + public PartitionAndMore(Partition partition, Path currentLocation, Optional> fileNames, PartitionStatistics statistics, PartitionStatistics statisticsUpdate) { this.partition = requireNonNull(partition, "partition is null"); this.currentLocation = requireNonNull(currentLocation, "currentLocation is null"); this.fileNames = requireNonNull(fileNames, "fileNames is null"); + this.statistics = requireNonNull(statistics, "statistics is null"); this.statisticsUpdate = requireNonNull(statisticsUpdate, "statisticsUpdate is null"); } @@ -1911,7 +1969,12 @@ public List getFileNames() return fileNames.get(); } - public Optional getStatisticsUpdate() + public PartitionStatistics getStatistics() + { + return statistics; + } + + public PartitionStatistics getStatisticsUpdate() { return statisticsUpdate; } @@ -2261,28 +2324,27 @@ public void undo(ExtendedHiveMetastore metastore) private static class UpdateStatisticsOperation { private final SchemaTableName tableName; - private final Optional> partitionValues; - private final HiveBasicStatistics statistics; + private final Optional partitionName; + private final PartitionStatistics statistics; + private final boolean merge; private boolean done; - public UpdateStatisticsOperation( - SchemaTableName tableName, - Optional> partitionValues, - HiveBasicStatistics statistics) + public UpdateStatisticsOperation(SchemaTableName tableName, Optional partitionName, PartitionStatistics statistics, boolean merge) { this.tableName = requireNonNull(tableName, "tableName is null"); - this.partitionValues = requireNonNull(partitionValues, "partitionValues is null").map(ImmutableList::copyOf); + this.partitionName = requireNonNull(partitionName, "partitionValues is null"); this.statistics = requireNonNull(statistics, "statistics is null"); + this.merge = merge; } public void run(ExtendedHiveMetastore metastore) { - if (partitionValues.isPresent()) { - metastore.updatePartitionParameters(tableName.getSchemaName(), tableName.getTableName(), partitionValues.get(), parameters -> updateStatistics(parameters, statistics, ADD)); + if (partitionName.isPresent()) { + metastore.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName.get(), this::updateStatistics); } else { - metastore.updateTableParameters(tableName.getSchemaName(), tableName.getTableName(), parameters -> updateStatistics(parameters, statistics, ADD)); + metastore.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), this::updateStatistics); } done = true; } @@ -2292,21 +2354,31 @@ public void undo(ExtendedHiveMetastore metastore) if (!done) { return; } - if (partitionValues.isPresent()) { - metastore.updatePartitionParameters(tableName.getSchemaName(), tableName.getTableName(), partitionValues.get(), parameters -> updateStatistics(parameters, statistics, SUBTRACT)); + if (partitionName.isPresent()) { + metastore.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName.get(), this::resetStatistics); } else { - metastore.updateTableParameters(tableName.getSchemaName(), tableName.getTableName(), parameters -> updateStatistics(parameters, statistics, SUBTRACT)); + metastore.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), this::resetStatistics); } } public String getDescription() { - if (partitionValues.isPresent()) { - return format("replace partition parameters %s %s", tableName, partitionValues.get()); + if (partitionName.isPresent()) { + return format("replace partition parameters %s %s", tableName, partitionName.get()); } return format("replace table parameters %s", tableName); } + + private PartitionStatistics updateStatistics(PartitionStatistics currentStatistics) + { + return merge ? merge(currentStatistics, statistics) : statistics; + } + + private PartitionStatistics resetStatistics(PartitionStatistics currentStatistics) + { + return new PartitionStatistics(reduce(currentStatistics.getBasicStatistics(), statistics.getBasicStatistics(), SUBTRACT), ImmutableMap.of()); + } } private static class PartitionAdder diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java index 2c2c1ef48e8c8..1d207b3f588b9 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java @@ -17,10 +17,12 @@ import com.facebook.presto.hive.HdfsConfigurationUpdater; import com.facebook.presto.hive.HdfsEnvironment; import com.facebook.presto.hive.HdfsEnvironment.HdfsContext; +import com.facebook.presto.hive.HiveBasicStatistics; import com.facebook.presto.hive.HiveClientConfig; import com.facebook.presto.hive.HiveHdfsConfiguration; import com.facebook.presto.hive.HiveType; import com.facebook.presto.hive.PartitionNotFoundException; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.SchemaAlreadyExistsException; import com.facebook.presto.hive.TableAlreadyExistsException; import com.facebook.presto.hive.authentication.NoHdfsAuthentication; @@ -72,6 +74,7 @@ import java.util.function.Function; import static com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR; +import static com.facebook.presto.hive.HivePartitionManager.extractPartitionKeyValues; import static com.facebook.presto.hive.HiveUtil.toPartitionValues; import static com.facebook.presto.hive.metastore.Database.DEFAULT_DATABASE_NAME; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; @@ -79,6 +82,10 @@ import static com.facebook.presto.hive.metastore.MetastoreUtil.verifyCanDropColumn; import static com.facebook.presto.hive.metastore.PrincipalType.ROLE; import static com.facebook.presto.hive.metastore.PrincipalType.USER; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticParameters; +import static com.facebook.presto.hive.util.Statistics.migrateStatistics; +import static com.facebook.presto.hive.util.Statistics.removeStatistics; import static com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.Preconditions.checkArgument; @@ -270,15 +277,36 @@ public synchronized Optional
getTable(String databaseName, String tableNa } @Override - public Map getTableColumnStatistics(String databaseName, String tableName) + public boolean supportsColumnStatistics() { - return ImmutableMap.of(); + return true; + } + + @Override + public synchronized PartitionStatistics getTableStatistics(String databaseName, String tableName) + { + Path tableMetadataDirectory = getTableMetadataDirectory(databaseName, tableName); + TableMetadata tableMetadata = readSchemaFile("table", tableMetadataDirectory, tableCodec) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + HiveBasicStatistics basicStatistics = getHiveBasicStatistics(tableMetadata.getParameters()); + Map columnStatistics = tableMetadata.getColumnStatistics(); + return new PartitionStatistics(basicStatistics, columnStatistics); } @Override - public Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames) + public synchronized Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) { - return ImmutableMap.of(); + Table table = getRequiredTable(databaseName, tableName); + ImmutableMap.Builder statistics = ImmutableMap.builder(); + for (String partitionName : partitionNames) { + List partitionValues = extractPartitionKeyValues(partitionName); + Path partitionDirectory = getPartitionMetadataDirectory(table, ImmutableList.copyOf(partitionValues)); + PartitionMetadata partitionMetadata = readSchemaFile("partition", partitionDirectory, partitionCodec) + .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues)); + HiveBasicStatistics basicStatistics = getHiveBasicStatistics(partitionMetadata.getParameters()); + statistics.put(partitionName, new PartitionStatistics(basicStatistics, partitionMetadata.getColumnStatistics())); + } + return statistics.build(); } private Table getRequiredTable(String databaseName, String tableName) @@ -294,6 +322,43 @@ private void verifyTableNotExists(String newDatabaseName, String newTableName) } } + @Override + public synchronized void updateTableStatistics(String databaseName, String tableName, Function update) + { + PartitionStatistics originalStatistics = getTableStatistics(databaseName, tableName); + PartitionStatistics updatedStatistics = update.apply(originalStatistics); + + Path tableMetadataDirectory = getTableMetadataDirectory(databaseName, tableName); + TableMetadata tableMetadata = readSchemaFile("table", tableMetadataDirectory, tableCodec) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + + TableMetadata updatedMetadata = tableMetadata + .withParameters(updateStatisticParameters(tableMetadata.getParameters(), updatedStatistics.getBasicStatistics())) + .withColumnStatistics(updatedStatistics.getColumnStatistics()); + + writeSchemaFile("table", tableMetadataDirectory, tableCodec, updatedMetadata, true); + } + + @Override + public synchronized void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update) + { + PartitionStatistics originalStatistics = requireNonNull( + getPartitionStatistics(databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName), "getPartitionStatistics() returned null"); + PartitionStatistics updatedStatistics = update.apply(originalStatistics); + + Table table = getRequiredTable(databaseName, tableName); + List partitionValues = extractPartitionKeyValues(partitionName); + Path partitionDirectory = getPartitionMetadataDirectory(table, partitionValues); + PartitionMetadata partitionMetadata = readSchemaFile("partition", partitionDirectory, partitionCodec) + .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues)); + + PartitionMetadata updatedMetadata = partitionMetadata + .withParameters(updateStatisticParameters(partitionMetadata.getParameters(), updatedStatistics.getBasicStatistics())) + .withColumnStatistics(updatedStatistics.getColumnStatistics()); + + writeSchemaFile("partition", partitionDirectory, partitionCodec, updatedMetadata, true); + } + @Override public synchronized Optional> getAllTables(String databaseName) { @@ -460,8 +525,12 @@ public synchronized void renameColumn(String databaseName, String tableName, Str } } - return oldTable.withDataColumns(newDataColumns.build()); + return oldTable + .withDataColumns(newDataColumns.build()) + .withColumnStatistics(migrateStatistics(oldTable.getColumnStatistics(), oldColumnName, newColumnName)); }); + + alterTablePartitions(databaseName, tableName, partition -> partition.withColumnStatistics(migrateStatistics(partition.getColumnStatistics(), oldColumnName, newColumnName))); } @Override @@ -481,8 +550,12 @@ public synchronized void dropColumn(String databaseName, String tableName, Strin } } - return oldTable.withDataColumns(newDataColumns.build()); + return oldTable + .withDataColumns(newDataColumns.build()) + .withColumnStatistics(removeStatistics(oldTable.getColumnStatistics(), columnName)); }); + + alterTablePartitions(databaseName, tableName, partition -> partition.withColumnStatistics(removeStatistics(partition.getColumnStatistics(), columnName))); } private void alterTable(String databaseName, String tableName, Function alterFunction) @@ -502,6 +575,23 @@ private void alterTable(String databaseName, String tableName, Function alterFunction) + { + Table table = getRequiredTable(databaseName, tableName); + + SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); + List partitionNames = getPartitionNames(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(schemaTableName)); + + for (String partitionName : partitionNames) { + List partitionValues = toPartitionValues(partitionName); + Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partitionValues); + PartitionMetadata partitionMetadata = readSchemaFile("partition", partitionMetadataDirectory, partitionCodec) + .orElseThrow(() -> new PartitionNotFoundException(schemaTableName, partitionValues)); + writeSchemaFile("partition", partitionMetadataDirectory, partitionCodec, alterFunction.apply(partitionMetadata), true); + } + } + @Override public synchronized void addPartitions(String databaseName, String tableName, List partitions) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java index 54fe4a79b0ec7..66ad1c366fbe4 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java @@ -16,6 +16,7 @@ import com.facebook.presto.hive.HiveBucketProperty; import com.facebook.presto.hive.HiveStorageFormat; import com.facebook.presto.hive.metastore.Column; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.Storage; import com.facebook.presto.hive.metastore.StorageFormat; @@ -45,6 +46,8 @@ public class PartitionMetadata private final Optional externalLocation; + private final Map columnStatistics; + @JsonCreator public PartitionMetadata( @JsonProperty("columns") List columns, @@ -52,7 +55,8 @@ public PartitionMetadata( @JsonProperty("storageFormat") Optional storageFormat, @JsonProperty("bucketProperty") Optional bucketProperty, @JsonProperty("serdeParameters") Map serdeParameters, - @JsonProperty("externalLocation") Optional externalLocation) + @JsonProperty("externalLocation") Optional externalLocation, + @JsonProperty("columnStatistics") Map columnStatistics) { this.columns = ImmutableList.copyOf(requireNonNull(columns, "columns is null")); this.parameters = ImmutableMap.copyOf(requireNonNull(parameters, "parameters is null")); @@ -62,9 +66,15 @@ public PartitionMetadata( this.serdeParameters = requireNonNull(serdeParameters, "serdeParameters is null"); this.externalLocation = requireNonNull(externalLocation, "externalLocation is null"); + this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null")); } public PartitionMetadata(Table table, Partition partition) + { + this(table, partition, ImmutableMap.of()); + } + + public PartitionMetadata(Table table, Partition partition, Map columnStatistics) { this.columns = partition.getColumns(); this.parameters = partition.getParameters(); @@ -83,6 +93,7 @@ public PartitionMetadata(Table table, Partition partition) bucketProperty = partition.getStorage().getBucketProperty(); serdeParameters = partition.getStorage().getSerdeParameters(); + this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null")); } @JsonProperty @@ -121,10 +132,22 @@ public Optional getExternalLocation() return externalLocation; } + @JsonProperty + public Map getColumnStatistics() + { + return columnStatistics; + } + public PartitionMetadata withParameters(Map parameters) { requireNonNull(parameters, "parameters is null"); - return new PartitionMetadata(columns, parameters, storageFormat, bucketProperty, serdeParameters, externalLocation); + return new PartitionMetadata(columns, parameters, storageFormat, bucketProperty, serdeParameters, externalLocation, columnStatistics); + } + + public PartitionMetadata withColumnStatistics(Map columnStatistics) + { + requireNonNull(columnStatistics, "columnStatistics is null"); + return new PartitionMetadata(columns, parameters, storageFormat, bucketProperty, serdeParameters, externalLocation, columnStatistics); } public Partition toPartition(String databaseName, String tableName, List values, String location) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/TableMetadata.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/TableMetadata.java index 2e9dcbdafdffa..13ada8a1788ab 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/TableMetadata.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/file/TableMetadata.java @@ -16,6 +16,7 @@ import com.facebook.presto.hive.HiveBucketProperty; import com.facebook.presto.hive.HiveStorageFormat; import com.facebook.presto.hive.metastore.Column; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.Storage; import com.facebook.presto.hive.metastore.StorageFormat; import com.facebook.presto.hive.metastore.Table; @@ -51,6 +52,8 @@ public class TableMetadata private final Optional viewOriginalText; private final Optional viewExpandedText; + private final Map columnStatistics; + @JsonCreator public TableMetadata( @JsonProperty("owner") String owner, @@ -63,7 +66,8 @@ public TableMetadata( @JsonProperty("serdeParameters") Map serdeParameters, @JsonProperty("externalLocation") Optional externalLocation, @JsonProperty("viewOriginalText") Optional viewOriginalText, - @JsonProperty("viewExpandedText") Optional viewExpandedText) + @JsonProperty("viewExpandedText") Optional viewExpandedText, + @JsonProperty("columnStatistics") Map columnStatistics) { this.owner = requireNonNull(owner, "owner is null"); this.tableType = requireNonNull(tableType, "tableType is null"); @@ -84,9 +88,16 @@ public TableMetadata( this.viewOriginalText = requireNonNull(viewOriginalText, "viewOriginalText is null"); this.viewExpandedText = requireNonNull(viewExpandedText, "viewExpandedText is null"); + this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null")); + checkArgument(partitionColumns.isEmpty() || columnStatistics.isEmpty(), "column statistics cannot be set for partitioned table"); } public TableMetadata(Table table) + { + this(table, ImmutableMap.of()); + } + + public TableMetadata(Table table, Map columnStatistics) { owner = table.getOwner(); tableType = table.getTableType(); @@ -110,6 +121,7 @@ public TableMetadata(Table table) viewOriginalText = table.getViewOriginalText(); viewExpandedText = table.getViewExpandedText(); + this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null")); } @JsonProperty @@ -193,6 +205,12 @@ public Optional getViewExpandedText() return viewExpandedText; } + @JsonProperty + public Map getColumnStatistics() + { + return columnStatistics; + } + public TableMetadata withDataColumns(List dataColumns) { return new TableMetadata( @@ -206,7 +224,8 @@ public TableMetadata withDataColumns(List dataColumns) serdeParameters, externalLocation, viewOriginalText, - viewExpandedText); + viewExpandedText, + columnStatistics); } public TableMetadata withParameters(Map parameters) @@ -222,7 +241,25 @@ public TableMetadata withParameters(Map parameters) serdeParameters, externalLocation, viewOriginalText, - viewExpandedText); + viewExpandedText, + columnStatistics); + } + + public TableMetadata withColumnStatistics(Map columnStatistics) + { + return new TableMetadata( + owner, + tableType, + dataColumns, + partitionColumns, + parameters, + storageFormat, + bucketProperty, + serdeParameters, + externalLocation, + viewOriginalText, + viewExpandedText, + columnStatistics); } public Table toTable(String databaseName, String tableName, String location) diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java index 6a401029e62ff..d05a68837b5d5 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java @@ -57,12 +57,12 @@ import com.facebook.presto.hive.HiveUtil; import com.facebook.presto.hive.HiveWriteUtils; import com.facebook.presto.hive.PartitionNotFoundException; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.SchemaAlreadyExistsException; import com.facebook.presto.hive.TableAlreadyExistsException; import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.Database; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; -import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.PrincipalPrivileges; @@ -87,6 +87,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -98,6 +99,8 @@ import static com.facebook.presto.hive.metastore.MetastoreUtil.makePartName; import static com.facebook.presto.hive.metastore.MetastoreUtil.verifyCanDropColumn; import static com.facebook.presto.hive.metastore.glue.GlueExpressionUtil.buildGlueExpression; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticParameters; import static com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.Strings.isNullOrEmpty; @@ -210,6 +213,12 @@ public Optional
getTable(String databaseName, String tableName) } } + @Override + public boolean supportsColumnStatistics() + { + return false; + } + private Table getTableOrElseThrow(String databaseName, String tableName) { return getTable(databaseName, tableName) @@ -217,15 +226,80 @@ private Table getTableOrElseThrow(String databaseName, String tableName) } @Override - public Map getTableColumnStatistics(String databaseName, String tableName) + public PartitionStatistics getTableStatistics(String databaseName, String tableName) { - return ImmutableMap.of(); + Table table = getTable(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + return new PartitionStatistics(getHiveBasicStatistics(table.getParameters()), ImmutableMap.of()); } @Override - public Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames) + public Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) { - return ImmutableMap.of(); + ImmutableMap.Builder result = ImmutableMap.builder(); + getPartitionsByNames(databaseName, tableName, ImmutableList.copyOf(partitionNames)).forEach((partitionName, optionalPartition) -> { + Partition partition = optionalPartition.orElseThrow(() -> + new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), toPartitionValues(partitionName))); + PartitionStatistics partitionStatistics = new PartitionStatistics(getHiveBasicStatistics(partition.getParameters()), ImmutableMap.of()); + result.put(partitionName, partitionStatistics); + }); + return result.build(); + } + + @Override + public void updateTableStatistics(String databaseName, String tableName, Function update) + { + PartitionStatistics currentStatistics = getTableStatistics(databaseName, tableName); + PartitionStatistics updatedStatistics = update.apply(currentStatistics); + if (!updatedStatistics.getColumnStatistics().isEmpty()) { + throw new PrestoException(NOT_SUPPORTED, "glue metastore does not support column level statistics"); + } + + Table table = getTableOrElseThrow(databaseName, tableName); + + try { + TableInput tableInput = GlueInputConverter.convertTable(table); + tableInput.setParameters(updateStatisticParameters(table.getParameters(), updatedStatistics.getBasicStatistics())); + glueClient.updateTable(new UpdateTableRequest() + .withDatabaseName(databaseName) + .withTableInput(tableInput)); + } + catch (EntityNotFoundException e) { + throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); + } + catch (AmazonServiceException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, e); + } + } + + @Override + public void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update) + { + PartitionStatistics currentStatistics = requireNonNull( + getPartitionStatistics(databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName), "getPartitionStatistics() returned null"); + PartitionStatistics updatedStatistics = update.apply(currentStatistics); + if (!updatedStatistics.getColumnStatistics().isEmpty()) { + throw new PrestoException(NOT_SUPPORTED, "glue metastore does not support column level statistics"); + } + + List partitionValues = toPartitionValues(partitionName); + Partition partition = getPartition(databaseName, tableName, partitionValues) + .orElseThrow(() -> new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues)); + try { + PartitionInput partitionInput = GlueInputConverter.convertPartition(partition); + partitionInput.setParameters(updateStatisticParameters(partition.getParameters(), updatedStatistics.getBasicStatistics())); + glueClient.updatePartition(new UpdatePartitionRequest() + .withDatabaseName(databaseName) + .withTableName(tableName) + .withPartitionValueList(partition.getValues()) + .withPartitionInput(partitionInput)); + } + catch (EntityNotFoundException e) { + throw new PartitionNotFoundException(new SchemaTableName(databaseName, tableName), partitionValues); + } + catch (AmazonServiceException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, e); + } } @Override @@ -598,7 +672,7 @@ public Map> getPartitionsByNames(String databaseName .collect(toMap(Partition::getValues, identity())); ImmutableMap.Builder> resultBuilder = ImmutableMap.builder(); - for (Map.Entry> entry : partitionNameToPartitionValuesMap.entrySet()) { + for (Entry> entry : partitionNameToPartitionValuesMap.entrySet()) { Partition partition = partitionValuesToPartitionMap.get(entry.getValue()); resultBuilder.put(entry.getKey(), Optional.ofNullable(partition)); } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/BridgingHiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/BridgingHiveMetastore.java index 377843843ac9e..dca0ec3154456 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/BridgingHiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/BridgingHiveMetastore.java @@ -16,10 +16,9 @@ import com.facebook.presto.hive.HiveType; import com.facebook.presto.hive.HiveUtil; import com.facebook.presto.hive.PartitionNotFoundException; -import com.facebook.presto.hive.metastore.Column; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.metastore.Database; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; -import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.PrincipalPrivileges; @@ -29,7 +28,8 @@ import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.TableNotFoundException; import com.google.common.collect.ImmutableMap; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; @@ -47,9 +47,9 @@ import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPrivilegeGrantInfo; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiTable; +import static com.facebook.presto.hive.util.Statistics.migrateStatistics; +import static com.facebook.presto.hive.util.Statistics.removeStatistics; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; -import static com.google.common.collect.ImmutableMap.toImmutableMap; -import static com.google.common.collect.ImmutableSet.toImmutableSet; import static java.util.Objects.requireNonNull; import static java.util.function.UnaryOperator.identity; @@ -83,35 +83,33 @@ public Optional
getTable(String databaseName, String tableName) } @Override - public Map getTableColumnStatistics(String databaseName, String tableName) + public boolean supportsColumnStatistics() { - Table table = getTable(databaseName, tableName) - .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); - Set dataColumns = table.getDataColumns().stream() - .map(Column::getName) - .collect(toImmutableSet()); - return groupStatisticsByColumn(delegate.getTableColumnStatistics(databaseName, tableName, dataColumns)); + return delegate.supportsColumnStatistics(); } @Override - public Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames) + public PartitionStatistics getTableStatistics(String databaseName, String tableName) { - Table table = getTable(databaseName, tableName) - .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); - Set dataColumns = table.getDataColumns().stream() - .map(Column::getName) - .collect(toImmutableSet()); - Map> statistics = delegate.getPartitionColumnStatistics(databaseName, tableName, partitionNames, dataColumns); - return statistics.entrySet() - .stream() - .filter(entry -> !entry.getValue().isEmpty()) - .collect(toImmutableMap(Map.Entry::getKey, entry -> groupStatisticsByColumn(entry.getValue()))); + return delegate.getTableStatistics(databaseName, tableName); + } + + @Override + public Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) + { + return delegate.getPartitionStatistics(databaseName, tableName, partitionNames); } - private Map groupStatisticsByColumn(Set statistics) + @Override + public void updateTableStatistics(String databaseName, String tableName, Function update) + { + delegate.updateTableStatistics(databaseName, tableName, update); + } + + @Override + public void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update) { - return statistics.stream() - .collect(toImmutableMap(ColumnStatisticsObj::getColName, ThriftMetastoreUtil::fromMetastoreApiColumnStatistics)); + delegate.updatePartitionStatistics(databaseName, tableName, partitionName, update); } @Override @@ -228,7 +226,14 @@ public void renameColumn(String databaseName, String tableName, String oldColumn fieldSchema.setName(newColumnName); } } + + PartitionStatistics tableStatistics = getTableStatistics(databaseName, tableName); + Map partitionStatistics = loadPartitionStatisticsInSmallBatches(databaseName, tableName); + alterTable(databaseName, tableName, table); + + updateTableStatistics(databaseName, tableName, statistics -> migrateStatistics(tableStatistics, oldColumnName, newColumnName)); + partitionStatistics.forEach((partitionName, statistics) -> updatePartitionStatistics(databaseName, tableName, partitionName, stats -> migrateStatistics(statistics, oldColumnName, newColumnName))); } @Override @@ -238,7 +243,14 @@ public void dropColumn(String databaseName, String tableName, String columnName) org.apache.hadoop.hive.metastore.api.Table table = delegate.getTable(databaseName, tableName) .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); table.getSd().getCols().removeIf(fieldSchema -> fieldSchema.getName().equals(columnName)); + + PartitionStatistics tableStatistics = getTableStatistics(databaseName, tableName); + Map partitionStatistics = loadPartitionStatisticsInSmallBatches(databaseName, tableName); + alterTable(databaseName, tableName, table); + + updateTableStatistics(databaseName, tableName, statistics -> removeStatistics(tableStatistics, columnName)); + partitionStatistics.forEach((partitionName, statistics) -> updatePartitionStatistics(databaseName, tableName, partitionName, stats -> removeStatistics(statistics, columnName))); } private void alterTable(String databaseName, String tableName, org.apache.hadoop.hive.metastore.api.Table table) @@ -246,6 +258,16 @@ private void alterTable(String databaseName, String tableName, org.apache.hadoop delegate.alterTable(databaseName, tableName, table); } + private Map loadPartitionStatisticsInSmallBatches(String databaseName, String tableName) + { + ImmutableMap.Builder result = ImmutableMap.builder(); + List partitionNames = getPartitionNames(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + Lists.partition(partitionNames, 100) + .forEach(batch -> result.putAll(getPartitionStatistics(databaseName, tableName, ImmutableSet.copyOf(batch)))); + return result.build(); + } + @Override public Optional getPartition(String databaseName, String tableName, List partitionValues) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastore.java index 631091d7e6461..be4a52234c560 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastore.java @@ -13,8 +13,8 @@ */ package com.facebook.presto.hive.metastore.thrift; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.PrivilegeGrantInfo; @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import static com.facebook.presto.hive.metastore.Database.DEFAULT_DATABASE_NAME; import static org.apache.hadoop.hive.metastore.api.PrincipalType.ROLE; @@ -72,9 +73,15 @@ public interface HiveMetastore Optional
getTable(String databaseName, String tableName); - Set getTableColumnStatistics(String databaseName, String tableName, Set columnNames); + boolean supportsColumnStatistics(); - Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames, Set columnNames); + PartitionStatistics getTableStatistics(String databaseName, String tableName); + + Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames); + + void updateTableStatistics(String databaseName, String tableName, Function update); + + void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update); Set getRoles(String user); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastoreClient.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastoreClient.java index c483ba637df97..64b867299dea1 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastoreClient.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/HiveMetastoreClient.java @@ -71,9 +71,21 @@ Table getTable(String databaseName, String tableName) List getTableColumnStatistics(String databaseName, String tableName, List columnNames) throws TException; + void setTableColumnStatistics(String databaseName, String tableName, List statistics) + throws TException; + + void deleteTableColumnStatistics(String databaseName, String tableName, String columnName) + throws TException; + Map> getPartitionColumnStatistics(String databaseName, String tableName, List partitionNames, List columnNames) throws TException; + void setPartitionColumnStatistics(String databaseName, String tableName, String partitionName, List statistics) + throws TException; + + void deletePartitionColumnStatistics(String databaseName, String tableName, String partitionName, String columnName) + throws TException; + List getPartitionNames(String databaseName, String tableName) throws TException; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java index 8a9583d1a474c..f51d855b12eef 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java @@ -13,11 +13,14 @@ */ package com.facebook.presto.hive.metastore.thrift; +import com.facebook.presto.hive.HiveBasicStatistics; import com.facebook.presto.hive.HiveViewNotSupportedException; import com.facebook.presto.hive.PartitionNotFoundException; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.RetryDriver; import com.facebook.presto.hive.SchemaAlreadyExistsException; import com.facebook.presto.hive.TableAlreadyExistsException; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrincipal; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; import com.facebook.presto.spi.PrestoException; @@ -32,8 +35,10 @@ import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; import org.apache.hadoop.hive.metastore.api.HiveObjectRef; +import org.apache.hadoop.hive.metastore.api.InvalidInputException; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -55,30 +60,37 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.concurrent.Callable; import java.util.function.Function; +import static com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics; import static com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR; import static com.facebook.presto.hive.HiveUtil.PRESTO_VIEW_FLAG; import static com.facebook.presto.hive.metastore.HivePrincipal.toHivePrincipal; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.parsePrivilege; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toGrants; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticParameters; import static com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Throwables.throwIfUnchecked; +import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.Iterables.getOnlyElement; +import static com.google.common.collect.Sets.difference; import static com.google.common.collect.Sets.newHashSet; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.function.Function.identity; import static java.util.stream.Collectors.toSet; +import static org.apache.hadoop.hive.common.FileUtils.makePartName; import static org.apache.hadoop.hive.metastore.api.HiveObjectType.DATABASE; import static org.apache.hadoop.hive.metastore.api.HiveObjectType.TABLE; import static org.apache.hadoop.hive.metastore.api.PrincipalType.ROLE; @@ -225,13 +237,31 @@ public Optional
getTable(String databaseName, String tableName) } } + @Override + public boolean supportsColumnStatistics() + { + return true; + } + private static boolean isPrestoView(Table table) { return "true".equals(table.getParameters().get(PRESTO_VIEW_FLAG)); } @Override - public Set getTableColumnStatistics(String databaseName, String tableName, Set columnNames) + public PartitionStatistics getTableStatistics(String databaseName, String tableName) + { + Table table = getTable(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + List dataColumns = table.getSd().getCols().stream() + .map(FieldSchema::getName) + .collect(toImmutableList()); + HiveBasicStatistics basicStatistics = getHiveBasicStatistics(table.getParameters()); + Map columnStatistics = getPartitionColumnStatistics(databaseName, tableName, dataColumns); + return new PartitionStatistics(basicStatistics, columnStatistics); + } + + private Map getPartitionColumnStatistics(String databaseName, String tableName, List columns) { try { return retry() @@ -239,12 +269,12 @@ public Set getTableColumnStatistics(String databaseName, St .stopOnIllegalExceptions() .run("getTableColumnStatistics", stats.getGetTableColumnStatistics().wrap(() -> { try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { - return ImmutableSet.copyOf(client.getTableColumnStatistics(databaseName, tableName, ImmutableList.copyOf(columnNames))); + return groupStatisticsByColumn(client.getTableColumnStatistics(databaseName, tableName, columns)); } })); } catch (NoSuchObjectException e) { - return ImmutableSet.of(); + throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); } catch (TException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); @@ -255,7 +285,33 @@ public Set getTableColumnStatistics(String databaseName, St } @Override - public Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames, Set columnNames) + public Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) + { + Table table = getTable(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + List dataColumns = table.getSd().getCols().stream() + .map(FieldSchema::getName) + .collect(toImmutableList()); + List partitionColumns = table.getPartitionKeys().stream() + .map(FieldSchema::getName) + .collect(toImmutableList()); + + Map partitionBasicStatistics = getPartitionsByNames(databaseName, tableName, ImmutableList.copyOf(partitionNames)).stream() + .collect(toImmutableMap( + partition -> makePartName(partitionColumns, partition.getValues()), + partition -> getHiveBasicStatistics(partition.getParameters()))); + Map> partitionColumnStatistics = getPartitionColumnStatistics(databaseName, tableName, partitionNames, dataColumns); + ImmutableMap.Builder result = ImmutableMap.builder(); + for (String partitionName : partitionNames) { + HiveBasicStatistics basicStatistics = partitionBasicStatistics.getOrDefault(partitionName, createEmptyStatistics()); + Map columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of()); + result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics)); + } + + return result.build(); + } + + private Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames, List columnNames) { try { return retry() @@ -263,15 +319,173 @@ public Map> getPartitionColumnStatistics(String .stopOnIllegalExceptions() .run("getPartitionColumnStatistics", stats.getGetPartitionColumnStatistics().wrap(() -> { try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { - Map> partitionColumnStatistics = client.getPartitionColumnStatistics(databaseName, tableName, ImmutableList.copyOf(partitionNames), ImmutableList.copyOf(columnNames)); + Map> partitionColumnStatistics = client.getPartitionColumnStatistics(databaseName, tableName, ImmutableList.copyOf(partitionNames), columnNames); return partitionColumnStatistics.entrySet() .stream() - .collect(toImmutableMap(Entry::getKey, entry -> ImmutableSet.copyOf(entry.getValue()))); + .filter(entry -> !entry.getValue().isEmpty()) + .collect(toImmutableMap(Map.Entry::getKey, entry -> groupStatisticsByColumn(entry.getValue()))); } })); } catch (NoSuchObjectException e) { - return ImmutableMap.of(); + throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); + } + catch (TException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, e); + } + catch (Exception e) { + throw propagate(e); + } + } + + private Map groupStatisticsByColumn(List statistics) + { + return statistics.stream() + .collect(toImmutableMap(ColumnStatisticsObj::getColName, ThriftMetastoreUtil::fromMetastoreApiColumnStatistics)); + } + + @Override + public synchronized void updateTableStatistics(String databaseName, String tableName, Function update) + { + PartitionStatistics currentStatistics = getTableStatistics(databaseName, tableName); + PartitionStatistics updatedStatistics = update.apply(currentStatistics); + + Table originalTable = getTable(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + Table modifiedTable = originalTable.deepCopy(); + modifiedTable.setParameters(updateStatisticParameters(modifiedTable.getParameters(), updatedStatistics.getBasicStatistics())); + alterTable(databaseName, tableName, modifiedTable); + + com.facebook.presto.hive.metastore.Table table = fromMetastoreApiTable(modifiedTable); + List metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet().stream() + .map(entry -> createMetastoreColumnStatistics(entry.getKey(), table.getColumn(entry.getKey()).get().getType(), entry.getValue())) + .collect(toImmutableList()); + if (!metastoreColumnStatistics.isEmpty()) { + setTableColumnStatistics(databaseName, tableName, metastoreColumnStatistics); + } + Set removedColumnStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet()); + removedColumnStatistics.forEach(column -> deleteTableColumnStatistics(databaseName, tableName, column)); + } + + private void setTableColumnStatistics(String databaseName, String tableName, List statistics) + { + try { + retry() + .stopOn(NoSuchObjectException.class, InvalidObjectException.class, MetaException.class, InvalidInputException.class) + .stopOnIllegalExceptions() + .run("setTableColumnStatistics", stats.getCreateDatabase().wrap(() -> { + try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { + client.setTableColumnStatistics(databaseName, tableName, statistics); + } + return null; + })); + } + catch (NoSuchObjectException e) { + throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); + } + catch (TException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, e); + } + catch (Exception e) { + throw propagate(e); + } + } + + private void deleteTableColumnStatistics(String databaseName, String tableName, String columnName) + { + try { + retry() + .stopOn(NoSuchObjectException.class, InvalidObjectException.class, MetaException.class, InvalidInputException.class) + .stopOnIllegalExceptions() + .run("deleteTableColumnStatistics", stats.getCreateDatabase().wrap(() -> { + try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { + client.deleteTableColumnStatistics(databaseName, tableName, columnName); + } + return null; + })); + } + catch (NoSuchObjectException e) { + throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); + } + catch (TException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, e); + } + catch (Exception e) { + throw propagate(e); + } + } + + @Override + public synchronized void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update) + { + PartitionStatistics currentStatistics = requireNonNull( + getPartitionStatistics(databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName), "getPartitionStatistics() returned null"); + PartitionStatistics updatedStatistics = update.apply(currentStatistics); + + List partitions = getPartitionsByNames(databaseName, tableName, ImmutableList.of(partitionName)); + if (partitions.size() != 1) { + throw new PrestoException(HIVE_METASTORE_ERROR, "exactly one partition is expected to be returned: " + partitions.size()); + } + + Partition originalPartition = getOnlyElement(partitions); + Partition modifiedPartition = originalPartition.deepCopy(); + modifiedPartition.setParameters(updateStatisticParameters(modifiedPartition.getParameters(), updatedStatistics.getBasicStatistics())); + alterPartition(databaseName, tableName, modifiedPartition); + + com.facebook.presto.hive.metastore.Table table = getTable(databaseName, tableName) + .map(ThriftMetastoreUtil::fromMetastoreApiTable) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + + List metastoreColumnStatistics = updatedStatistics.getColumnStatistics().entrySet() + .stream() + .map(entry -> createMetastoreColumnStatistics(entry.getKey(), table.getColumn(entry.getKey()).get().getType(), entry.getValue())) + .collect(toImmutableList()); + if (!metastoreColumnStatistics.isEmpty()) { + setPartitionColumnStatistics(databaseName, tableName, partitionName, metastoreColumnStatistics); + } + Set removedStatistics = difference(currentStatistics.getColumnStatistics().keySet(), updatedStatistics.getColumnStatistics().keySet()); + removedStatistics.forEach(column -> deletePartitionColumnStatistics(databaseName, tableName, partitionName, column)); + } + + private void setPartitionColumnStatistics(String databaseName, String tableName, String partitionName, List statistics) + { + try { + retry() + .stopOn(NoSuchObjectException.class, InvalidObjectException.class, MetaException.class, InvalidInputException.class) + .stopOnIllegalExceptions() + .run("setPartitionColumnStatistics", stats.getCreateDatabase().wrap(() -> { + try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { + client.setPartitionColumnStatistics(databaseName, tableName, partitionName, statistics); + } + return null; + })); + } + catch (NoSuchObjectException e) { + throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); + } + catch (TException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, e); + } + catch (Exception e) { + throw propagate(e); + } + } + + private void deletePartitionColumnStatistics(String databaseName, String tableName, String partitionName, String columnName) + { + try { + retry() + .stopOn(NoSuchObjectException.class, InvalidObjectException.class, MetaException.class, InvalidInputException.class) + .stopOnIllegalExceptions() + .run("deletePartitionColumnStatistics", stats.getCreateDatabase().wrap(() -> { + try (HiveMetastoreClient client = clientProvider.createMetastoreClient()) { + client.deletePartitionColumnStatistics(databaseName, tableName, partitionName, columnName); + } + return null; + })); + } + catch (NoSuchObjectException e) { + throw new TableNotFoundException(new SchemaTableName(databaseName, tableName)); } catch (TException e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastoreClient.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastoreClient.java index 9f3813dbd9361..5cefaf2dfabba 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastoreClient.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastoreClient.java @@ -13,6 +13,8 @@ */ package com.facebook.presto.hive.metastore.thrift; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege; @@ -146,6 +148,22 @@ public List getTableColumnStatistics(String databaseName, S return client.get_table_statistics_req(tableStatsRequest).getTableStats(); } + @Override + public void setTableColumnStatistics(String databaseName, String tableName, List statistics) + throws TException + { + ColumnStatisticsDesc statisticsDescription = new ColumnStatisticsDesc(true, databaseName, tableName); + ColumnStatistics request = new ColumnStatistics(statisticsDescription, statistics); + client.update_table_column_statistics(request); + } + + @Override + public void deleteTableColumnStatistics(String databaseName, String tableName, String columnName) + throws TException + { + client.delete_table_column_statistics(databaseName, tableName, columnName); + } + @Override public Map> getPartitionColumnStatistics(String databaseName, String tableName, List partitionNames, List columnNames) throws TException @@ -154,6 +172,23 @@ public Map> getPartitionColumnStatistics(Strin return client.get_partitions_statistics_req(partitionsStatsRequest).getPartStats(); } + @Override + public void setPartitionColumnStatistics(String databaseName, String tableName, String partitionName, List statistics) + throws TException + { + ColumnStatisticsDesc statisticsDescription = new ColumnStatisticsDesc(false, databaseName, tableName); + statisticsDescription.setPartName(partitionName); + ColumnStatistics request = new ColumnStatistics(statisticsDescription, statistics); + client.update_partition_column_statistics(request); + } + + @Override + public void deletePartitionColumnStatistics(String databaseName, String tableName, String partitionName, String columnName) + throws TException + { + client.delete_partition_column_statistics(databaseName, tableName, partitionName, columnName); + } + @Override public List getPartitionNames(String databaseName, String tableName) throws TException diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java index c81e1ec7b68df..9f9011650563c 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.hive.metastore.thrift; +import com.facebook.presto.hive.HiveBasicStatistics; import com.facebook.presto.hive.HiveBucketProperty; import com.facebook.presto.hive.HiveType; import com.facebook.presto.hive.metastore.Column; @@ -28,6 +29,7 @@ import com.facebook.presto.spi.PrestoException; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.primitives.Longs; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -44,11 +46,14 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import javax.annotation.Nullable; import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.time.LocalDate; import java.util.Collection; import java.util.List; @@ -60,13 +65,28 @@ import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.parsePrivilege; +import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.emptyToNull; import static com.google.common.base.Strings.nullToEmpty; import static java.lang.String.format; import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.binaryStats; +import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.booleanStats; +import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.dateStats; +import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.decimalStats; +import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.doubleStats; +import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.longStats; +import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.stringStats; +import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category.PRIMITIVE; public final class ThriftMetastoreUtil { + private static final String NUM_FILES = "numFiles"; + private static final String NUM_ROWS = "numRows"; + private static final String RAW_DATA_SIZE = "rawDataSize"; + private static final String TOTAL_SIZE = "totalSize"; + private static final Set STATS_PROPERTIES = ImmutableSet.of(NUM_FILES, NUM_ROWS, RAW_DATA_SIZE, TOTAL_SIZE); + private ThriftMetastoreUtil() {} public static org.apache.hadoop.hive.metastore.api.Database toMetastoreApiDatabase(Database database) @@ -430,4 +450,167 @@ private static StorageDescriptor makeStorageDescriptor(String tableName, List parameters) + { + OptionalLong numFiles = parse(parameters.get(NUM_FILES)); + OptionalLong numRows = parse(parameters.get(NUM_ROWS)); + OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE)); + OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE)); + return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes); + } + + private static OptionalLong parse(@Nullable String parameterValue) + { + if (parameterValue == null) { + return OptionalLong.empty(); + } + Long longValue = Longs.tryParse(parameterValue); + if (longValue == null || longValue < 0) { + return OptionalLong.empty(); + } + return OptionalLong.of(longValue); + } + + public static Map updateStatisticParameters(Map parameters, HiveBasicStatistics statistics) + { + ImmutableMap.Builder result = ImmutableMap.builder(); + + parameters.forEach((key, value) -> { + if (!STATS_PROPERTIES.contains(key)) { + result.put(key, value); + } + }); + + statistics.getFileCount().ifPresent(count -> result.put(NUM_FILES, Long.toString(count))); + statistics.getRowCount().ifPresent(count -> result.put(NUM_ROWS, Long.toString(count))); + statistics.getInMemoryDataSizeInBytes().ifPresent(size -> result.put(RAW_DATA_SIZE, Long.toString(size))); + statistics.getOnDiskDataSizeInBytes().ifPresent(size -> result.put(TOTAL_SIZE, Long.toString(size))); + + return result.build(); + } + + public static ColumnStatisticsObj createMetastoreColumnStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + TypeInfo typeInfo = columnType.getTypeInfo(); + checkArgument(typeInfo.getCategory() == PRIMITIVE, "unsupported type: %s", columnType); + switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { + case BOOLEAN: + return createBooleanStatistics(columnName, columnType, statistics); + case BYTE: + case SHORT: + case INT: + case LONG: + return createLongStatistics(columnName, columnType, statistics); + case FLOAT: + case DOUBLE: + return createDoubleStatistics(columnName, columnType, statistics); + case STRING: + case VARCHAR: + case CHAR: + return createStringStatistics(columnName, columnType, statistics); + case DATE: + return createDateStatistics(columnName, columnType, statistics); + case TIMESTAMP: + return createLongStatistics(columnName, columnType, statistics); + case BINARY: + return createBinaryStatistics(columnName, columnType, statistics); + case DECIMAL: + return createDecimalStatistics(columnName, columnType, statistics); + default: + throw new IllegalArgumentException(format("unsupported type: %s", columnType)); + } + } + + private static ColumnStatisticsObj createBooleanStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + BooleanColumnStatsData data = new BooleanColumnStatsData(); + statistics.getNullsCount().ifPresent(data::setNumNulls); + statistics.getFalseCount().ifPresent(data::setNumFalses); + statistics.getTrueCount().ifPresent(data::setNumTrues); + return new ColumnStatisticsObj(columnName, columnType.toString(), booleanStats(data)); + } + + private static ColumnStatisticsObj createLongStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + LongColumnStatsData data = new LongColumnStatsData(); + statistics.getLowValue().ifPresent(value -> data.setLowValue((Long) value)); + statistics.getHighValue().ifPresent(value -> data.setHighValue((Long) value)); + statistics.getNullsCount().ifPresent(data::setNumNulls); + toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); + return new ColumnStatisticsObj(columnName, columnType.toString(), longStats(data)); + } + + private static ColumnStatisticsObj createDoubleStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + DoubleColumnStatsData data = new DoubleColumnStatsData(); + statistics.getLowValue().ifPresent(value -> data.setLowValue((Double) value)); + statistics.getHighValue().ifPresent(value -> data.setHighValue((Double) value)); + statistics.getNullsCount().ifPresent(data::setNumNulls); + toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); + return new ColumnStatisticsObj(columnName, columnType.toString(), doubleStats(data)); + } + + private static ColumnStatisticsObj createStringStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + StringColumnStatsData data = new StringColumnStatsData(); + statistics.getNullsCount().ifPresent(data::setNumNulls); + toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); + data.setMaxColLen(statistics.getMaxColumnLength().orElse(0)); + data.setAvgColLen(statistics.getAverageColumnLength().orElse(0)); + return new ColumnStatisticsObj(columnName, columnType.toString(), stringStats(data)); + } + + private static ColumnStatisticsObj createDateStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + DateColumnStatsData data = new DateColumnStatsData(); + statistics.getLowValue().ifPresent(value -> data.setLowValue(toMetastoreDate((LocalDate) value))); + statistics.getHighValue().ifPresent(value -> data.setHighValue(toMetastoreDate((LocalDate) value))); + statistics.getNullsCount().ifPresent(data::setNumNulls); + toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); + return new ColumnStatisticsObj(columnName, columnType.toString(), dateStats(data)); + } + + private static ColumnStatisticsObj createBinaryStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + BinaryColumnStatsData data = new BinaryColumnStatsData(); + statistics.getNullsCount().ifPresent(data::setNumNulls); + data.setMaxColLen(statistics.getMaxColumnLength().orElse(0)); + data.setAvgColLen(statistics.getAverageColumnLength().orElse(0)); + return new ColumnStatisticsObj(columnName, columnType.toString(), binaryStats(data)); + } + + private static ColumnStatisticsObj createDecimalStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics) + { + DecimalColumnStatsData data = new DecimalColumnStatsData(); + statistics.getLowValue().ifPresent(value -> data.setLowValue(toMetastoreDecimal((BigDecimal) value))); + statistics.getHighValue().ifPresent(value -> data.setHighValue(toMetastoreDecimal((BigDecimal) value))); + statistics.getNullsCount().ifPresent(data::setNumNulls); + toMetastoreDistinctValuesCount(statistics.getDistinctValuesCount(), statistics.getNullsCount()).ifPresent(data::setNumDVs); + return new ColumnStatisticsObj(columnName, columnType.toString(), decimalStats(data)); + } + + public static Date toMetastoreDate(LocalDate date) + { + return new Date(date.toEpochDay()); + } + + public static Decimal toMetastoreDecimal(BigDecimal decimal) + { + return new Decimal(ByteBuffer.wrap(decimal.unscaledValue().toByteArray()), (short) decimal.scale()); + } + + /** + * Metastore stores NDV considering null as a distinct value + */ + private static OptionalLong toMetastoreDistinctValuesCount(OptionalLong distinctValuesCount, OptionalLong nullsCount) + { + if (distinctValuesCount.isPresent() && nullsCount.isPresent()) { + if (nullsCount.getAsLong() > 0) { + return OptionalLong.of(distinctValuesCount.getAsLong() + 1); + } + return distinctValuesCount; + } + return OptionalLong.empty(); + } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java index 0465d5ccec9c5..69d6ed4fc4658 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/statistics/MetastoreHiveStatisticsProvider.java @@ -14,37 +14,29 @@ package com.facebook.presto.hive.statistics; -import com.facebook.presto.hive.HiveBasicStatistics; +import com.facebook.presto.hive.CollectibleStatisticsProvider; import com.facebook.presto.hive.HiveColumnHandle; import com.facebook.presto.hive.HivePartition; import com.facebook.presto.hive.HiveTableHandle; import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.metastore.HiveColumnStatistics; -import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore; -import com.facebook.presto.hive.metastore.Table; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorSession; import com.facebook.presto.spi.ConnectorTableHandle; -import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.predicate.NullableValue; +import com.facebook.presto.spi.statistics.ColumnStatisticType; import com.facebook.presto.spi.statistics.ColumnStatistics; import com.facebook.presto.spi.statistics.Estimate; import com.facebook.presto.spi.statistics.RangeColumnStatistics; import com.facebook.presto.spi.statistics.TableStatistics; -import com.facebook.presto.spi.type.DecimalType; -import com.facebook.presto.spi.type.Decimals; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import org.joda.time.DateTimeZone; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.time.LocalDate; import java.util.Collection; import java.util.Comparator; import java.util.List; @@ -53,23 +45,18 @@ import java.util.OptionalDouble; import java.util.OptionalLong; import java.util.PrimitiveIterator; +import java.util.Set; import java.util.function.Function; import java.util.stream.DoubleStream; import static com.facebook.presto.hive.HiveSessionProperties.isStatisticsEnabled; +import static com.facebook.presto.hive.util.Statistics.getMinMaxAsPrestoTypeValue; import static com.facebook.presto.spi.predicate.Utils.nativeValueToBlock; -import static com.facebook.presto.spi.type.BigintType.BIGINT; -import static com.facebook.presto.spi.type.DateType.DATE; -import static com.facebook.presto.spi.type.DoubleType.DOUBLE; -import static com.facebook.presto.spi.type.IntegerType.INTEGER; -import static com.facebook.presto.spi.type.RealType.REAL; -import static com.facebook.presto.spi.type.SmallintType.SMALLINT; -import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; -import static com.facebook.presto.spi.type.TinyintType.TINYINT; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MIN; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; -import static java.lang.Float.floatToRawIntBits; -import static java.lang.String.format; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static java.util.Objects.requireNonNull; public class MetastoreHiveStatisticsProvider @@ -78,12 +65,18 @@ public class MetastoreHiveStatisticsProvider private final TypeManager typeManager; private final SemiTransactionalHiveMetastore metastore; private final DateTimeZone timeZone; + private final CollectibleStatisticsProvider collectibleStatisticsProvider; - public MetastoreHiveStatisticsProvider(TypeManager typeManager, SemiTransactionalHiveMetastore metastore, DateTimeZone timeZone) + public MetastoreHiveStatisticsProvider( + TypeManager typeManager, + SemiTransactionalHiveMetastore metastore, + DateTimeZone timeZone, + CollectibleStatisticsProvider collectibleStatisticsProvider) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.metastore = requireNonNull(metastore, "metastore is null"); - this.timeZone = timeZone; + this.timeZone = requireNonNull(timeZone, "timeZone is null"); + this.collectibleStatisticsProvider = requireNonNull(collectibleStatisticsProvider, "collectibleStatisticsProvider is null"); } @Override @@ -93,7 +86,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab return TableStatistics.EMPTY_STATISTICS; } - Map partitionStatistics = getPartitionsStatistics((HiveTableHandle) tableHandle, hivePartitions, tableColumns); + Map partitionStatistics = getPartitionsStatistics((HiveTableHandle) tableHandle, hivePartitions); TableStatistics.Builder tableStatistics = TableStatistics.builder(); Estimate rowCount = calculateRowsCount(partitionStatistics); @@ -111,7 +104,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab if (hiveColumnHandle.isPartitionKey()) { rangeStatistics.setDistinctValuesCount(countDistinctPartitionKeys(hiveColumnHandle, hivePartitions)); nullsFraction = calculateNullsFractionForPartitioningKey(hiveColumnHandle, hivePartitions, partitionStatistics); - if (isLowHighSupportedForType(prestoType)) { + if (isMinMaxSupportedForType(prestoType)) { lowValueCandidates = hivePartitions.stream() .map(HivePartition::getKeys) .map(keys -> keys.get(hiveColumnHandle)) @@ -129,7 +122,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab // partitions. And return unknown if most of the partitions we are working with do not have // statistics computed. - if (isLowHighSupportedForType(prestoType)) { + if (isMinMaxSupportedForType(prestoType)) { lowValueCandidates = partitionStatistics.values().stream() .map(PartitionStatistics::getColumnStatistics) .filter(stats -> stats.containsKey(columnName)) @@ -137,7 +130,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab .map(HiveColumnStatistics::getLowValue) .filter(Optional::isPresent) .map(Optional::get) - .map(value -> lowHighValueAsPrestoType(value, prestoType)) + .map(value -> getMinMaxAsPrestoTypeValue(value, prestoType, timeZone)) .collect(toImmutableList()); highValueCandidates = partitionStatistics.values().stream() @@ -147,7 +140,7 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab .map(HiveColumnStatistics::getHighValue) .filter(Optional::isPresent) .map(Optional::get) - .map(value -> lowHighValueAsPrestoType(value, prestoType)) + .map(value -> getMinMaxAsPrestoTypeValue(value, prestoType, timeZone)) .collect(toImmutableList()); } } @@ -169,67 +162,6 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab return tableStatistics.build(); } - private boolean isLowHighSupportedForType(Type type) - { - if (type instanceof DecimalType) { - return true; - } - if (type.equals(TINYINT) - || type.equals(SMALLINT) - || type.equals(INTEGER) - || type.equals(BIGINT) - || type.equals(REAL) - || type.equals(DOUBLE) - || type.equals(DATE) - || type.equals(TIMESTAMP)) { - return true; - } - return false; - } - - private Object lowHighValueAsPrestoType(Object value, Type prestoType) - { - checkArgument(isLowHighSupportedForType(prestoType), "Unsupported type " + prestoType); - requireNonNull(value, "high/low value connot be null"); - - if (prestoType.equals(BIGINT) - || prestoType.equals(INTEGER) - || prestoType.equals(SMALLINT) - || prestoType.equals(TINYINT)) { - checkArgument(value instanceof Long, "expected Long value but got " + value.getClass()); - return value; - } - else if (prestoType.equals(DOUBLE)) { - checkArgument(value instanceof Double, "expected Double value but got " + value.getClass()); - return value; - } - else if (prestoType.equals(REAL)) { - checkArgument(value instanceof Double, "expected Double value but got " + value.getClass()); - return (long) floatToRawIntBits((float) (double) value); - } - else if (prestoType.equals(DATE)) { - checkArgument(value instanceof LocalDate, "expected LocalDate value but got " + value.getClass()); - return ((LocalDate) value).toEpochDay(); - } - else if (prestoType.equals(TIMESTAMP)) { - checkArgument(value instanceof Long, "expected Long value but got " + value.getClass()); - return timeZone.convertLocalToUTC((long) value * 1000, false); - } - else if (prestoType instanceof DecimalType) { - checkArgument(value instanceof BigDecimal, "expected BigDecimal value but got " + value.getClass()); - BigInteger unscaled = Decimals.rescale((BigDecimal) value, (DecimalType) prestoType).unscaledValue(); - if (Decimals.isShortDecimal(prestoType)) { - return unscaled.longValueExact(); - } - else { - return Decimals.encodeUnscaledValue(unscaled); - } - } - else { - throw new IllegalArgumentException("Unsupported presto type " + prestoType); - } - } - private Estimate calculateRowsCount(Map partitionStatistics) { List knownPartitionRowCounts = partitionStatistics.values().stream() @@ -366,7 +298,7 @@ private Estimate summarizePartitionStatistics( } } - private Map getPartitionsStatistics(HiveTableHandle tableHandle, List hivePartitions, Map tableColumns) + private Map getPartitionsStatistics(HiveTableHandle tableHandle, List hivePartitions) { if (hivePartitions.isEmpty()) { return ImmutableMap.of(); @@ -377,50 +309,21 @@ private Map getPartitionsStatistics(HiveTableHandle } if (unpartitioned) { - return ImmutableMap.of(HivePartition.UNPARTITIONED_ID, getTableStatistics(tableHandle.getSchemaTableName())); + return ImmutableMap.of(HivePartition.UNPARTITIONED_ID, metastore.getTableStatistics(tableHandle.getSchemaName(), tableHandle.getTableName())); } else { - return getPartitionsStatistics(tableHandle.getSchemaTableName(), hivePartitions); - } - } - - private Map getPartitionsStatistics(SchemaTableName schemaTableName, List hivePartitions) - { - String databaseName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - - ImmutableMap.Builder resultMap = ImmutableMap.builder(); - - List partitionNames = hivePartitions.stream().map(HivePartition::getPartitionId).collect(toImmutableList()); - Map> partitionColumnStatisticsMap = - metastore.getPartitionColumnStatistics(databaseName, tableName, ImmutableSet.copyOf(partitionNames)); - - Map> partitionsByNames = metastore.getPartitionsByNames(databaseName, tableName, partitionNames); - for (String partitionName : partitionNames) { - Map partitionParameters = partitionsByNames.get(partitionName) - .map(Partition::getParameters) - .orElseThrow(() -> new IllegalArgumentException(format("Could not get metadata for partition %s.%s.%s", databaseName, tableName, partitionName))); - Map partitionColumnStatistics = partitionColumnStatisticsMap.getOrDefault(partitionName, ImmutableMap.of()); - resultMap.put(partitionName, readStatisticsFromParameters(partitionParameters, partitionColumnStatistics)); + return metastore.getPartitionStatistics( + tableHandle.getSchemaName(), + tableHandle.getTableName(), + hivePartitions.stream() + .map(HivePartition::getPartitionId) + .collect(toImmutableSet())); } - - return resultMap.build(); - } - - private PartitionStatistics getTableStatistics(SchemaTableName schemaTableName) - { - String databaseName = schemaTableName.getSchemaName(); - String tableName = schemaTableName.getTableName(); - Table table = metastore.getTable(databaseName, tableName) - .orElseThrow(() -> new IllegalArgumentException(format("Could not get metadata for table %s.%s", databaseName, tableName))); - - Map tableColumnStatistics = metastore.getTableColumnStatistics(databaseName, tableName); - - return readStatisticsFromParameters(table.getParameters(), tableColumnStatistics); } - private PartitionStatistics readStatisticsFromParameters(Map parameters, Map columnStatistics) + private boolean isMinMaxSupportedForType(Type type) { - return new PartitionStatistics(HiveBasicStatistics.createFromPartitionParameters(parameters), columnStatistics); + Set statisticTypes = collectibleStatisticsProvider.get(type); + return statisticTypes.contains(MIN) && statisticTypes.contains(MAX); } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/Statistics.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/Statistics.java index d0b6cf8f53492..bbc07ad542153 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/Statistics.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/Statistics.java @@ -14,73 +14,323 @@ package com.facebook.presto.hive.util; import com.facebook.presto.hive.HiveBasicStatistics; -import com.facebook.presto.hive.metastore.Partition; -import com.facebook.presto.hive.metastore.Table; +import com.facebook.presto.hive.PartitionStatistics; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.Page; +import com.facebook.presto.spi.block.Block; +import com.facebook.presto.spi.statistics.ColumnStatisticMetadata; +import com.facebook.presto.spi.statistics.ColumnStatisticType; +import com.facebook.presto.spi.statistics.ComputedStatistics; +import com.facebook.presto.spi.type.BigintType; +import com.facebook.presto.spi.type.DecimalType; +import com.facebook.presto.spi.type.Decimals; +import com.facebook.presto.spi.type.SqlDate; +import com.facebook.presto.spi.type.SqlDecimal; +import com.facebook.presto.spi.type.Type; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import org.joda.time.DateTimeZone; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.time.LocalDate; +import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.OptionalDouble; import java.util.OptionalLong; import java.util.Set; -import static com.facebook.presto.hive.HiveBasicStatistics.createZeroStatistics; +import static com.facebook.presto.hive.HiveWriteUtils.createPartitionValues; import static com.facebook.presto.hive.util.Statistics.ReduceOperator.ADD; -import static com.facebook.presto.hive.util.Statistics.ReduceOperator.SUBTRACT; -import static java.util.Collections.unmodifiableMap; -import static java.util.stream.Collectors.toMap; +import static com.facebook.presto.hive.util.Statistics.ReduceOperator.SELECT_MAX; +import static com.facebook.presto.hive.util.Statistics.ReduceOperator.SELECT_MIN; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.AVERAGE_VALUE_SIZE_IN_BYTES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MIN; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; +import static com.facebook.presto.spi.type.Chars.isCharType; +import static com.facebook.presto.spi.type.DateType.DATE; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.spi.type.IntegerType.INTEGER; +import static com.facebook.presto.spi.type.RealType.REAL; +import static com.facebook.presto.spi.type.SmallintType.SMALLINT; +import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; +import static com.facebook.presto.spi.type.TinyintType.TINYINT; +import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.spi.type.Varchars.isVarcharType; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.Sets.intersection; +import static java.lang.Float.floatToRawIntBits; +import static java.util.Objects.requireNonNull; public final class Statistics { private Statistics() {} - private static final Set STATISTICS_PARAMETERS = ImmutableSet.copyOf(createZeroStatistics().toPartitionParameters().keySet()); + public static PartitionStatistics merge(PartitionStatistics first, PartitionStatistics second) + { + return new PartitionStatistics( + reduce(first.getBasicStatistics(), second.getBasicStatistics(), ADD), + merge(first.getColumnStatistics(), first.getBasicStatistics().getRowCount(), second.getColumnStatistics(), second.getBasicStatistics().getRowCount())); + } + + public static HiveBasicStatistics reduce(HiveBasicStatistics first, HiveBasicStatistics second, ReduceOperator operator) + { + return new HiveBasicStatistics( + reduce(first.getFileCount(), second.getFileCount(), operator), + reduce(first.getRowCount(), second.getRowCount(), operator), + reduce(first.getInMemoryDataSizeInBytes(), second.getInMemoryDataSizeInBytes(), operator), + reduce(first.getOnDiskDataSizeInBytes(), second.getOnDiskDataSizeInBytes(), operator)); + } + + public static Map merge( + Map first, OptionalLong firstRowCount, Map second, OptionalLong secondRowCount) + { + // skip the columns for which statistics from either of sides are missing + Set columns = intersection(first.keySet(), second.keySet()); + return columns.stream() + .collect(toImmutableMap(column -> column, column -> merge(first.get(column), firstRowCount, second.get(column), secondRowCount))); + } + + public static HiveColumnStatistics merge(HiveColumnStatistics first, OptionalLong firstRowCount, HiveColumnStatistics second, OptionalLong secondRowCount) + { + return new HiveColumnStatistics( + first.getLowValue().isPresent() ? reduce(first.getLowValue(), second.getLowValue(), SELECT_MIN) : second.getLowValue(), + first.getHighValue().isPresent() ? reduce(first.getHighValue(), second.getHighValue(), SELECT_MAX) : second.getHighValue(), + reduce(first.getMaxColumnLength(), second.getMaxColumnLength(), SELECT_MAX), + mergeAvg(first.getAverageColumnLength(), firstRowCount, second.getAverageColumnLength(), secondRowCount), + reduce(first.getTrueCount(), second.getTrueCount(), ADD), + reduce(first.getFalseCount(), second.getFalseCount(), ADD), + reduce(first.getNullsCount(), second.getNullsCount(), ADD), + reduce(first.getDistinctValuesCount(), second.getDistinctValuesCount(), SELECT_MAX)); + } + + private static OptionalDouble mergeAvg(OptionalDouble first, OptionalLong firstRowCount, OptionalDouble second, OptionalLong secondRowCount) + { + if (first.isPresent() && second.isPresent() && firstRowCount.isPresent() && secondRowCount.isPresent()) { + double sumFirst = first.getAsDouble() * firstRowCount.getAsLong(); + double sumSecond = second.getAsDouble() * secondRowCount.getAsLong(); + long totalRowCount = firstRowCount.getAsLong() + secondRowCount.getAsLong(); + return OptionalDouble.of((sumFirst + sumSecond) / totalRowCount); + } + return OptionalDouble.empty(); + } + + public static Set getSupportedStatistics(Type type) + { + if (type.equals(BOOLEAN)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_TRUE_VALUES); + } + else if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, MIN, MAX, NUMBER_OF_DISTINCT_VALUES); + } + else if (type.equals(DOUBLE) || type.equals(REAL)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, MIN, MAX, NUMBER_OF_DISTINCT_VALUES); + } + else if (isVarcharType(type)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_DISTINCT_VALUES, MAX_VALUE_SIZE_IN_BYTES, AVERAGE_VALUE_SIZE_IN_BYTES); + } + else if (isCharType(type)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_DISTINCT_VALUES); + } + else if (type.equals(VARBINARY)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, MAX_VALUE_SIZE_IN_BYTES, AVERAGE_VALUE_SIZE_IN_BYTES); + } + else if (type.equals(DATE) || type.equals(TIMESTAMP)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, MIN, MAX, NUMBER_OF_DISTINCT_VALUES); + } + else if (type instanceof DecimalType) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, MIN, MAX, NUMBER_OF_DISTINCT_VALUES); + } + else { + return ImmutableSet.of(); + } + } + + public static Object getMinMaxAsPrestoTypeValue(Object value, Type prestoType, DateTimeZone timeZone) + { + requireNonNull(value, "high/low value connot be null"); + + if (prestoType.equals(BIGINT) || prestoType.equals(INTEGER) || prestoType.equals(SMALLINT) || prestoType.equals(TINYINT)) { + checkArgument(value instanceof Long, "expected Long value but got " + value.getClass()); + return value; + } + if (prestoType.equals(DOUBLE)) { + checkArgument(value instanceof Double, "expected Double value but got " + value.getClass()); + return value; + } + if (prestoType.equals(REAL)) { + checkArgument(value instanceof Double, "expected Double value but got " + value.getClass()); + return (long) floatToRawIntBits((float) (double) value); + } + if (prestoType.equals(DATE)) { + checkArgument(value instanceof LocalDate, "expected LocalDate value but got " + value.getClass()); + return ((LocalDate) value).toEpochDay(); + } + if (prestoType.equals(TIMESTAMP)) { + checkArgument(value instanceof Long, "expected Long value but got " + value.getClass()); + return timeZone.convertLocalToUTC((long) value * 1000, false); + } + if (prestoType instanceof DecimalType) { + checkArgument(value instanceof BigDecimal, "expected BigDecimal value but got " + value.getClass()); + BigInteger unscaled = Decimals.rescale((BigDecimal) value, (DecimalType) prestoType).unscaledValue(); + if (Decimals.isShortDecimal(prestoType)) { + return unscaled.longValueExact(); + } + else { + return Decimals.encodeUnscaledValue(unscaled); + } + } + + throw new IllegalArgumentException("Unsupported presto type " + prestoType); + } + + public static PartitionStatistics migrateStatistics(PartitionStatistics statistics, String oldColumnName, String newColumnName) + { + return new PartitionStatistics(statistics.getBasicStatistics(), migrateStatistics(statistics.getColumnStatistics(), oldColumnName, newColumnName)); + } + + public static Map migrateStatistics(Map statistics, String oldColumnName, String newColumnName) + { + return statistics.entrySet().stream() + .collect(toImmutableMap(entry -> entry.getKey().equals(oldColumnName) ? newColumnName : entry.getKey(), Entry::getValue)); + } + + public static PartitionStatistics removeStatistics(PartitionStatistics statistics, String column) + { + return new PartitionStatistics(statistics.getBasicStatistics(), removeStatistics(statistics.getColumnStatistics(), column)); + } + + public static Map removeStatistics(Map statistics, String column) + { + return statistics.entrySet().stream() + .filter(entry -> !entry.getKey().equals(column)) + .collect(toImmutableMap(Entry::getKey, Entry::getValue)); + } - public static Table updateStatistics(Table table, HiveBasicStatistics statistics, ReduceOperator operator) + public static Map, ComputedStatistics> groupComputedStatisticsByPartition( + List computedStatistics, List partitionColumns, Map columnTypes) { - Map parameters = table.getParameters(); - Map updatedParameters = updateStatistics(parameters, statistics, operator); - return Table.builder(table) - .setParameters(updatedParameters) - .build(); + List partitionColumnTypes = partitionColumns.stream() + .map(columnTypes::get) + .collect(toImmutableList()); + + return computedStatistics.stream() + .collect(toImmutableMap(statistics -> getPartitionValues(statistics, partitionColumns, partitionColumnTypes), statistics -> statistics)); } - public static Partition updateStatistics(Partition partition, HiveBasicStatistics statistics, ReduceOperator operator) + private static List getPartitionValues(ComputedStatistics statistics, List partitionColumns, List partitionColumnTypes) { - Map parameters = partition.getParameters(); - Map updatedParameters = updateStatistics(parameters, statistics, operator); - return Partition.builder(partition) - .setParameters(updatedParameters) - .build(); + checkArgument(statistics.getGroupingColumns().equals(partitionColumns), + "Unexpected groping. Partition columns: %s. Grouping columns: %s", partitionColumns, statistics.getGroupingColumns()); + Page partitionColumnsPage = new Page(1, statistics.getGropingValues().toArray(new Block[] {})); + return createPartitionValues(partitionColumnTypes, partitionColumnsPage, 0); } - public static Map updateStatistics(Map parameters, HiveBasicStatistics update, ReduceOperator operator) + public static Map fromComputedStatistics( + ConnectorSession session, DateTimeZone timeZone, Map computedStatistics, Map columnTypes, long rowCount) { - HiveBasicStatistics currentStatistics = HiveBasicStatistics.createFromPartitionParameters(parameters); - HiveBasicStatistics updatedStatistics = reduce(currentStatistics, update, operator); - Map updatedParameters = parameters.entrySet() + return groupByColumn(computedStatistics) + .entrySet() .stream() - .filter(entry -> !STATISTICS_PARAMETERS.contains(entry.getKey())) - .collect(toMap(Map.Entry::getKey, Map.Entry::getValue)); - updatedParameters.putAll(updatedStatistics.toPartitionParameters()); - return unmodifiableMap(updatedParameters); + .collect(toImmutableMap(Entry::getKey, entry -> createHiveColumnStatistics(session, timeZone, entry.getValue(), columnTypes.get(entry.getKey()), rowCount))); } - public static HiveBasicStatistics add(HiveBasicStatistics first, HiveBasicStatistics second) + private static Map> groupByColumn(Map computedStatistics) { - return reduce(first, second, ADD); + Map> result = new HashMap<>(); + computedStatistics.forEach((metadata, block) -> { + Map columnStatistics = result.computeIfAbsent(metadata.getColumnName(), key -> new HashMap<>()); + columnStatistics.put(metadata.getStatisticType(), block); + }); + return result.entrySet() + .stream() + .collect(toImmutableMap(Entry::getKey, entry -> ImmutableMap.copyOf(entry.getValue()))); } - public static HiveBasicStatistics subtract(HiveBasicStatistics first, HiveBasicStatistics second) + private static HiveColumnStatistics createHiveColumnStatistics( + ConnectorSession session, + DateTimeZone timeZone, + Map computedStatistics, + Type columnType, + long rowCount) { - return reduce(first, second, SUBTRACT); + HiveColumnStatistics.Builder result = HiveColumnStatistics.builder(); + + // MIN MAX + if (computedStatistics.containsKey(MIN)) { + Block block = computedStatistics.get(MIN); + if (!block.isNull(0)) { + result.setLowValue(getMinMaxAsMetastoreValue(session, timeZone, columnType, block)); + } + } + if (computedStatistics.containsKey(MAX)) { + Block block = computedStatistics.get(MAX); + if (!block.isNull(0)) { + result.setHighValue(getMinMaxAsMetastoreValue(session, timeZone, columnType, block)); + } + } + + // NDV + if (computedStatistics.containsKey(NUMBER_OF_DISTINCT_VALUES)) { + result.setDistinctValuesCount(BigintType.BIGINT.getLong(computedStatistics.get(NUMBER_OF_DISTINCT_VALUES), 0)); + } + + // DATA SIZE + if (computedStatistics.containsKey(MAX_VALUE_SIZE_IN_BYTES)) { + result.setMaxColumnLength(BigintType.BIGINT.getLong(computedStatistics.get(MAX_VALUE_SIZE_IN_BYTES), 0)); + } + if (computedStatistics.containsKey(AVERAGE_VALUE_SIZE_IN_BYTES)) { + result.setAverageColumnLength(DOUBLE.getDouble(computedStatistics.get(AVERAGE_VALUE_SIZE_IN_BYTES), 0)); + } + + // NUMBER OF NULLS + if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) { + result.setNullsCount(rowCount - BigintType.BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0)); + } + + // NUMBER OF FALSE, NUMBER OF TRUE + if (computedStatistics.containsKey(NUMBER_OF_TRUE_VALUES)) { + long numberOfTrue = BigintType.BIGINT.getLong(computedStatistics.get(NUMBER_OF_TRUE_VALUES), 0); + result.setTrueCount(numberOfTrue); + if (computedStatistics.containsKey(NUMBER_OF_NON_NULL_VALUES)) { + long numberOfNonNullValues = BigintType.BIGINT.getLong(computedStatistics.get(NUMBER_OF_NON_NULL_VALUES), 0); + result.setFalseCount(numberOfNonNullValues - numberOfTrue); + } + } + return result.build(); } - public static HiveBasicStatistics reduce(HiveBasicStatistics first, HiveBasicStatistics second, ReduceOperator operator) + private static Comparable getMinMaxAsMetastoreValue(ConnectorSession session, DateTimeZone timeZone, Type type, Block block) { - return new HiveBasicStatistics( - reduce(first.getFileCount(), second.getFileCount(), operator), - reduce(first.getRowCount(), second.getRowCount(), operator), - reduce(first.getInMemoryDataSizeInBytes(), second.getInMemoryDataSizeInBytes(), operator), - reduce(first.getOnDiskDataSizeInBytes(), second.getOnDiskDataSizeInBytes(), operator)); + if (type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT)) { + return ((Number) type.getObjectValue(session, block, 0)).longValue(); + } + else if (type.equals(DOUBLE) || type.equals(REAL)) { + return ((Number) type.getObjectValue(session, block, 0)).doubleValue(); + } + else if (type.equals(DATE)) { + return LocalDate.ofEpochDay(((SqlDate) type.getObjectValue(session, block, 0)).getDays()); + } + else if (type.equals(TIMESTAMP)) { + long valueUtc = block.getLong(0, 0); + return timeZone.convertUTCToLocal(valueUtc) / 1000; + } + else if (type instanceof DecimalType) { + return ((SqlDecimal) type.getObjectValue(session, block, 0)).toBigDecimal(); + } + throw new IllegalArgumentException("Unexpected type: " + type); } private static OptionalLong reduce(OptionalLong first, OptionalLong second, ReduceOperator operator) @@ -91,26 +341,78 @@ private static OptionalLong reduce(OptionalLong first, OptionalLong second, Redu return OptionalLong.of(first.getAsLong() + second.getAsLong()); case SUBTRACT: return OptionalLong.of(first.getAsLong() - second.getAsLong()); + case SELECT_MAX: + return OptionalLong.of(max(first.getAsLong(), second.getAsLong())); + case SELECT_MIN: + return OptionalLong.of(min(first.getAsLong(), second.getAsLong())); + case AVG: + return OptionalLong.of((first.getAsLong() + second.getAsLong()) / 2); + default: + throw new IllegalArgumentException("Unexpected operator: " + operator); } } return OptionalLong.empty(); } - public enum ReduceOperator + private static OptionalDouble reduce(OptionalDouble first, OptionalDouble second, ReduceOperator operator) { - ADD, - SUBTRACT; - - public ReduceOperator flip() - { - switch (this) { - case SUBTRACT: - return ADD; + if (first.isPresent() && second.isPresent()) { + switch (operator) { case ADD: - return SUBTRACT; + return OptionalDouble.of(first.getAsDouble() + second.getAsDouble()); + case SUBTRACT: + return OptionalDouble.of(first.getAsDouble() - second.getAsDouble()); + case SELECT_MAX: + return OptionalDouble.of(max(first.getAsDouble(), second.getAsDouble())); + case SELECT_MIN: + return OptionalDouble.of(min(first.getAsDouble(), second.getAsDouble())); + case AVG: + return OptionalDouble.of((first.getAsDouble() + second.getAsDouble()) / 2); + default: + throw new IllegalArgumentException("Unexpected operator: " + operator); + } + } + return OptionalDouble.empty(); + } + + @SuppressWarnings("unchecked") + private static Optional> reduce( + Optional> firstOptional, + Optional> secondOptional, + ReduceOperator operator) + { + if (firstOptional.isPresent() && secondOptional.isPresent()) { + Comparable first = firstOptional.get(); + Comparable second = secondOptional.get(); + checkArgument(first.getClass().equals(second.getClass()), "cannot compare comparable of different types: %s != %s", first.getClass(), second.getClass()); + switch (operator) { + case SELECT_MAX: + return Optional.of(max((Comparable) first, (Comparable) second)); + case SELECT_MIN: + return Optional.of(min((Comparable) first, (Comparable) second)); default: - throw new UnsupportedOperationException("flip is not implemented for operation type: " + this); + throw new IllegalArgumentException("Unexpected operator: " + operator); } } + return Optional.empty(); + } + + private static > T max(T first, T second) + { + return first.compareTo(second) > 0 ? first : second; + } + + private static > T min(T first, T second) + { + return first.compareTo(second) < 0 ? first : second; + } + + public enum ReduceOperator + { + ADD, + SUBTRACT, + SELECT_MIN, + SELECT_MAX, + AVG } } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java index 48fcaf91051df..7919a97d7978a 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveClient.java @@ -21,6 +21,7 @@ import com.facebook.presto.hive.metastore.CachingHiveMetastore; import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; import com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege; import com.facebook.presto.hive.metastore.Partition; @@ -37,6 +38,7 @@ import com.facebook.presto.hive.parquet.ParquetHiveRecordCursor; import com.facebook.presto.hive.parquet.ParquetPageSource; import com.facebook.presto.hive.rcfile.RcFilePageSource; +import com.facebook.presto.hive.util.Statistics; import com.facebook.presto.metadata.MetadataManager; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; @@ -115,6 +117,7 @@ import org.testng.annotations.Test; import java.io.IOException; +import java.math.BigDecimal; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -122,7 +125,9 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalDouble; import java.util.OptionalInt; +import java.util.OptionalLong; import java.util.Set; import java.util.TimeZone; import java.util.UUID; @@ -138,6 +143,7 @@ import static com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT; import static com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH; import static com.facebook.presto.hive.AbstractTestHiveClient.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY; +import static com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics; import static com.facebook.presto.hive.HiveBasicStatistics.createZeroStatistics; import static com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME; import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; @@ -186,8 +192,10 @@ import static com.facebook.presto.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; +import static com.facebook.presto.spi.type.CharType.createCharType; import static com.facebook.presto.spi.type.Chars.isCharType; import static com.facebook.presto.spi.type.DateType.DATE; +import static com.facebook.presto.spi.type.DecimalType.createDecimalType; import static com.facebook.presto.spi.type.DoubleType.DOUBLE; import static com.facebook.presto.spi.type.HyperLogLogType.HYPER_LOG_LOG; import static com.facebook.presto.spi.type.IntegerType.INTEGER; @@ -200,6 +208,7 @@ import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; import static com.facebook.presto.spi.type.VarcharType.VARCHAR; import static com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType; +import static com.facebook.presto.spi.type.VarcharType.createVarcharType; import static com.facebook.presto.spi.type.Varchars.isVarcharType; import static com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf; import static com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream; @@ -236,6 +245,7 @@ import static java.util.concurrent.Executors.newFixedThreadPool; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.hive.common.FileUtils.makePartName; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.entry; import static org.joda.time.DateTimeZone.UTC; @@ -415,7 +425,111 @@ private static RowType toRowType(List columns) private static final JoinCompiler JOIN_COMPILER = new JoinCompiler(MetadataManager.createTestMetadataManager(), new FeaturesConfig()); - private static final Set STATISTICS_PARAMETERS = ImmutableSet.copyOf(createZeroStatistics().toPartitionParameters().keySet()); + private static final List STATISTICS_TABLE_COLUMNS = ImmutableList.builder() + .add(new ColumnMetadata("t_boolean", BOOLEAN)) + .add(new ColumnMetadata("t_bigint", BIGINT)) + .add(new ColumnMetadata("t_integer", INTEGER)) + .add(new ColumnMetadata("t_smallint", SMALLINT)) + .add(new ColumnMetadata("t_tinyint", TINYINT)) + .add(new ColumnMetadata("t_double", DOUBLE)) + .add(new ColumnMetadata("t_float", REAL)) + .add(new ColumnMetadata("t_string", createUnboundedVarcharType())) + .add(new ColumnMetadata("t_varchar", createVarcharType(100))) + .add(new ColumnMetadata("t_char", createCharType(5))) + .add(new ColumnMetadata("t_varbinary", VARBINARY)) + .add(new ColumnMetadata("t_date", DATE)) + .add(new ColumnMetadata("t_timestamp", TIMESTAMP)) + .add(new ColumnMetadata("t_short_decimal", createDecimalType(5, 2))) + .add(new ColumnMetadata("t_long_decimal", createDecimalType(20, 4))) + .build(); + + private static final List STATISTICS_PARTITIONED_TABLE_COLUMNS = ImmutableList.builder() + .addAll(STATISTICS_TABLE_COLUMNS) + .add(new ColumnMetadata("ds", VARCHAR)) + .build(); + + private static final PartitionStatistics STATISTICS_EMPTY_TABLE = new PartitionStatistics(createZeroStatistics(), ImmutableMap.of()); + + private static final PartitionStatistics STATISTICS_1 = + new PartitionStatistics( + new HiveBasicStatistics(0, 2, 3, 0), + ImmutableMap.builder() + .put("t_boolean", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.of(5), OptionalLong.of(6), OptionalLong.of(3), OptionalLong.empty())) + .put("t_bigint", new HiveColumnStatistics(Optional.of(1234L), Optional.of(5678L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(5))) + .put("t_integer", new HiveColumnStatistics(Optional.of(123L), Optional.of(567L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(3), OptionalLong.of(4))) + .put("t_smallint", new HiveColumnStatistics(Optional.of(12L), Optional.of(56L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(6))) + .put("t_tinyint", new HiveColumnStatistics(Optional.of(1L), Optional.of(2L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(1), OptionalLong.of(3))) + .put("t_double", new HiveColumnStatistics(Optional.of(1234.25), Optional.of(5678.58), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(8))) + .put("t_float", new HiveColumnStatistics(Optional.of(123.25), Optional.of(567.58), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(9), OptionalLong.of(10))) + .put("t_string", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(10), OptionalDouble.of(5.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(3), OptionalLong.of(7))) + .put("t_varchar", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(100), OptionalDouble.of(23.3), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(5), OptionalLong.of(3))) + .put("t_char", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(5), OptionalDouble.of(5.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(1), OptionalLong.of(4))) + .put("t_varbinary", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(4), OptionalDouble.of(3.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(1), OptionalLong.empty())) + .put("t_date", new HiveColumnStatistics(Optional.of(java.time.LocalDate.ofEpochDay(1)), Optional.of(java.time.LocalDate.ofEpochDay(2)), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(6))) + .put("t_timestamp", new HiveColumnStatistics(Optional.of(1234567L), Optional.of(71234567L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(5))) + .put("t_short_decimal", new HiveColumnStatistics(Optional.of(new BigDecimal(10)), Optional.of(new BigDecimal(12)), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(3), OptionalLong.of(5))) + .put("t_long_decimal", new HiveColumnStatistics(Optional.of(new BigDecimal("12345678901234567.123")), Optional.of(new BigDecimal("812345678901234567.123")), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(1))) + .build()); + + private static final PartitionStatistics STATISTICS_1_1 = + new PartitionStatistics( + new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.of(2), OptionalLong.empty(), OptionalLong.of(0)), + STATISTICS_1.getColumnStatistics().entrySet() + .stream() + .filter(entry -> entry.getKey().hashCode() % 2 == 0) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); + + private static final PartitionStatistics STATISTICS_1_2 = + new PartitionStatistics( + new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.empty(), OptionalLong.of(3), OptionalLong.of(0)), + STATISTICS_1.getColumnStatistics().entrySet() + .stream() + .filter(entry -> entry.getKey().hashCode() % 2 == 1) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); + + private static final PartitionStatistics STATISTICS_2 = + new PartitionStatistics( + new HiveBasicStatistics(0, 3, 2, 0), + ImmutableMap.builder() + .put("t_boolean", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.of(4), OptionalLong.of(3), OptionalLong.of(2), OptionalLong.empty())) + .put("t_bigint", new HiveColumnStatistics(Optional.of(2345L), Optional.of(6789L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(4), OptionalLong.of(7))) + .put("t_integer", new HiveColumnStatistics(Optional.of(234L), Optional.of(678L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(5), OptionalLong.of(6))) + .put("t_smallint", new HiveColumnStatistics(Optional.of(23L), Optional.of(65L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(5))) + .put("t_tinyint", new HiveColumnStatistics(Optional.of(2L), Optional.of(3L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(4))) + .put("t_double", new HiveColumnStatistics(Optional.of(2345.25), Optional.of(6785.58), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(6), OptionalLong.of(3))) + .put("t_float", new HiveColumnStatistics(Optional.of(235.25), Optional.of(676.58), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(11))) + .put("t_string", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(11), OptionalDouble.of(6.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(6))) + .put("t_varchar", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(99), OptionalDouble.of(22.3), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(1))) + .put("t_char", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(6), OptionalDouble.of(6.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(0), OptionalLong.of(3))) + .put("t_varbinary", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(2), OptionalDouble.of(1.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.empty())) + .put("t_date", new HiveColumnStatistics(Optional.of(java.time.LocalDate.ofEpochDay(2)), Optional.of(java.time.LocalDate.ofEpochDay(3)), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(8), OptionalLong.of(7))) + .put("t_timestamp", new HiveColumnStatistics(Optional.of(2345671L), Optional.of(12345677L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(9), OptionalLong.of(1))) + .put("t_short_decimal", new HiveColumnStatistics(Optional.of(new BigDecimal(11)), Optional.of(new BigDecimal(14)), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(3), OptionalLong.of(2))) + .put("t_long_decimal", new HiveColumnStatistics(Optional.of(new BigDecimal("71234567890123456.123")), Optional.of(new BigDecimal("781234567890123456.123")), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(5), OptionalLong.of(7))) + .build()); + + private static final PartitionStatistics STATISTICS_EMPTY_OPTIONAL_FIELDS = + new PartitionStatistics( + new HiveBasicStatistics(OptionalLong.of(0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(0)), + ImmutableMap.builder() + .put("t_boolean", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.of(4), OptionalLong.of(3), OptionalLong.of(2), OptionalLong.empty())) + .put("t_bigint", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(4), OptionalLong.of(7))) + .put("t_integer", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(5), OptionalLong.of(6))) + .put("t_smallint", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(5))) + .put("t_tinyint", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(4))) + .put("t_double", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(6), OptionalLong.of(3))) + .put("t_float", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(11))) + .put("t_string", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(0), OptionalDouble.of(0.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.of(6))) + .put("t_varchar", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(0), OptionalDouble.of(0.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(1))) + .put("t_char", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(0), OptionalDouble.of(0.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(0), OptionalLong.of(3))) + .put("t_varbinary", new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(0), OptionalDouble.of(0.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.empty())) + .put("t_date", new HiveColumnStatistics(Optional.of(java.time.LocalDate.ofEpochDay(2)), Optional.of(java.time.LocalDate.ofEpochDay(3)), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(8), OptionalLong.of(7))) + .put("t_timestamp", new HiveColumnStatistics(Optional.of(2345671L), Optional.of(12345677L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(9), OptionalLong.of(1))) + .put("t_short_decimal", new HiveColumnStatistics(Optional.of(new BigDecimal(11)), Optional.of(new BigDecimal(14)), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(3), OptionalLong.of(2))) + .put("t_long_decimal", new HiveColumnStatistics(Optional.of(new BigDecimal("71234567890123456.123")), Optional.of(new BigDecimal("781234567890123456.123")), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(5), OptionalLong.of(7))) + .build()); + + private static final HiveColumnStatistics DUMMY_COLUMN_STATISTICS = new HiveColumnStatistics(Optional.empty(), Optional.empty(), OptionalLong.of(0), OptionalDouble.of(0.0), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.of(7), OptionalLong.of(1)); protected String clientId; protected String database; @@ -625,7 +739,8 @@ protected final void setup(String databaseName, HiveClientConfig hiveClientConfi partitionUpdateCodec, newFixedThreadPool(2), new HiveTypeTranslator(), - TEST_SERVER_VERSION); + TEST_SERVER_VERSION, + Statistics::getSupportedStatistics); transactionManager = new HiveTransactionManager(); splitManager = new HiveSplitManager( transactionHandle -> ((HiveMetadata) transactionManager.get(transactionHandle)).getMetastore(), @@ -926,7 +1041,7 @@ protected void doTestMismatchSchemaTable( sink.appendPage(dataBefore.toPage()); Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments); + metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); transaction.commit(); } @@ -990,7 +1105,7 @@ protected void doTestMismatchSchemaTable( sink.appendPage(dataAfter.toPage()); Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments); + metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); transaction.commit(); @@ -2053,7 +2168,7 @@ public void testTableCreationIgnoreExisting() targetPath = locationService.getQueryWriteInfo(locationHandle).getTargetPath(); Table table = createSimpleTable(schemaTableName, columns, session, targetPath, "q1"); transaction.getMetastore(schemaName) - .createTable(session, table, privileges, Optional.empty(), false); + .createTable(session, table, privileges, Optional.empty(), false, STATISTICS_EMPTY_TABLE); Optional
tableHandle = transaction.getMetastore(schemaName).getTable(schemaName, tableName); assertTrue(tableHandle.isPresent()); transaction.commit(); @@ -2063,7 +2178,7 @@ public void testTableCreationIgnoreExisting() try (Transaction transaction = newTransaction()) { Table table = createSimpleTable(schemaTableName, columns, session, targetPath.suffix("_2"), "q2"); transaction.getMetastore(schemaName) - .createTable(session, table, privileges, Optional.empty(), false); + .createTable(session, table, privileges, Optional.empty(), false, STATISTICS_EMPTY_TABLE); transaction.commit(); fail("Expected exception"); } @@ -2075,7 +2190,7 @@ public void testTableCreationIgnoreExisting() try (Transaction transaction = newTransaction()) { Table table = createSimpleTable(schemaTableName, columns, session, targetPath.suffix("_3"), "q3"); transaction.getMetastore(schemaName) - .createTable(session, table, privileges, Optional.empty(), true); + .createTable(session, table, privileges, Optional.empty(), true, STATISTICS_EMPTY_TABLE); transaction.commit(); } @@ -2084,7 +2199,7 @@ public void testTableCreationIgnoreExisting() try (Transaction transaction = newTransaction()) { Table table = createSimpleTable(schemaTableName, columns, session, targetPath.suffix("_4"), "q4"); transaction.getMetastore(schemaName) - .createTable(session, table, privileges, Optional.empty(), true); + .createTable(session, table, privileges, Optional.empty(), true, STATISTICS_EMPTY_TABLE); transaction.commit(); fail("Expected exception"); } @@ -2200,7 +2315,7 @@ private void doTestBucketSortedTables(SchemaTableName table) } // finish creating table - metadata.finishCreateTable(session, outputHandle, fragments); + metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); transaction.commit(); } @@ -2444,6 +2559,307 @@ public void testUpdatePartitionParameters() } } + @Test + public void testUpdateTableColumnStatistics() + throws Exception + { + SchemaTableName tableName = temporaryTable("update_table_column_statistics"); + try { + doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); + + ExtendedHiveMetastore metastoreClient = getMetastoreClient(tableName.getSchemaName()); + assertThat(metastoreClient.supportsColumnStatistics()).isTrue(); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(STATISTICS_EMPTY_TABLE); + + metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_EMPTY_TABLE); + return STATISTICS_1_1; + }); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(STATISTICS_1_1); + + metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_1_1); + return STATISTICS_1_2; + }); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(STATISTICS_1_2); + + metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_1_2); + return STATISTICS_2; + }); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(STATISTICS_2); + + metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_2); + return STATISTICS_EMPTY_TABLE; + }); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(STATISTICS_EMPTY_TABLE); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testUpdateTableColumnStatisticsEmptyOptionalFields() + throws Exception + { + SchemaTableName tableName = temporaryTable("update_table_column_statistics_empty_optional_fields"); + try { + doCreateEmptyTable(tableName, ORC, STATISTICS_TABLE_COLUMNS); + + ExtendedHiveMetastore metastoreClient = getMetastoreClient(tableName.getSchemaName()); + assertThat(metastoreClient.supportsColumnStatistics()).isTrue(); + metastoreClient.updateTableStatistics( + tableName.getSchemaName(), + tableName.getTableName(), + currentStatistics -> STATISTICS_EMPTY_OPTIONAL_FIELDS); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(STATISTICS_EMPTY_OPTIONAL_FIELDS); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testUpdatePartitionColumnStatistics() + throws Exception + { + SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); + try { + doCreateEmptyTable(tableName, ORC, STATISTICS_PARTITIONED_TABLE_COLUMNS); + + ExtendedHiveMetastore metastoreClient = getMetastoreClient(tableName.getSchemaName()); + assertThat(metastoreClient.supportsColumnStatistics()).isTrue(); + Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) + .orElseThrow(() -> new TableNotFoundException(tableName)); + + List firstPartitionValues = ImmutableList.of("2016-01-01"); + List secondPartitionValues = ImmutableList.of("2016-01-02"); + + String firstPartitionName = makePartName(ImmutableList.of("ds"), firstPartitionValues); + String secondPartitionName = makePartName(ImmutableList.of("ds"), secondPartitionValues); + + List partitions = ImmutableList.of(firstPartitionValues, secondPartitionValues) + .stream() + .map(values -> Partition.builder() + .setDatabaseName(tableName.getSchemaName()) + .setTableName(tableName.getTableName()) + .setColumns(table.getPartitionColumns()) + .setValues(values) + .withStorage(storage -> storage + .setStorageFormat(fromHiveStorageFormat(HiveStorageFormat.ORC)) + .setLocation(table.getStorage().getLocation() + "/" + makePartName(ImmutableList.of("ds"), values))) + .build()) + .collect(toImmutableList()); + metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions); + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> STATISTICS_EMPTY_TABLE); + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> STATISTICS_EMPTY_TABLE); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) + .isEqualTo(ImmutableMap.of(firstPartitionName, STATISTICS_EMPTY_TABLE, secondPartitionName, STATISTICS_EMPTY_TABLE)); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_EMPTY_TABLE); + return STATISTICS_1_1; + }); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_EMPTY_TABLE); + return STATISTICS_1_2; + }); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) + .isEqualTo(ImmutableMap.of(firstPartitionName, STATISTICS_1_1, secondPartitionName, STATISTICS_1_2)); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_1_1); + return STATISTICS_1_2; + }); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_1_2); + return STATISTICS_1_1; + }); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) + .isEqualTo(ImmutableMap.of(firstPartitionName, STATISTICS_1_2, secondPartitionName, STATISTICS_1_1)); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_1_2); + return STATISTICS_2; + }); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_1_1); + return STATISTICS_2; + }); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) + .isEqualTo(ImmutableMap.of(firstPartitionName, STATISTICS_2, secondPartitionName, STATISTICS_2)); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_2); + return STATISTICS_EMPTY_TABLE; + }); + + metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> { + assertThat(currentStatistics).isEqualTo(STATISTICS_2); + return STATISTICS_EMPTY_TABLE; + }); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(firstPartitionName, secondPartitionName))) + .isEqualTo(ImmutableMap.of(firstPartitionName, STATISTICS_EMPTY_TABLE, secondPartitionName, STATISTICS_EMPTY_TABLE)); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() + throws Exception + { + SchemaTableName tableName = temporaryTable("update_partition_column_statistics"); + try { + doCreateEmptyTable(tableName, ORC, STATISTICS_PARTITIONED_TABLE_COLUMNS); + + ExtendedHiveMetastore metastoreClient = getMetastoreClient(tableName.getSchemaName()); + assertThat(metastoreClient.supportsColumnStatistics()).isTrue(); + Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) + .orElseThrow(() -> new TableNotFoundException(tableName)); + + metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(Partition.builder() + .setDatabaseName(tableName.getSchemaName()) + .setTableName(tableName.getTableName()) + .setColumns(table.getPartitionColumns()) + .setValues(ImmutableList.of("2016-01-01")) + .withStorage(storage -> storage + .setStorageFormat(fromHiveStorageFormat(HiveStorageFormat.ORC)) + .setLocation(table.getStorage().getLocation() + "/ds=2016-01-01")) + .build())); + + metastoreClient.updatePartitionStatistics( + tableName.getSchemaName(), + tableName.getTableName(), + "ds=2016-01-01", + currentStatistics -> STATISTICS_EMPTY_OPTIONAL_FIELDS); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of("ds=2016-01-01"))) + .isEqualTo(ImmutableMap.of("ds=2016-01-01", STATISTICS_EMPTY_OPTIONAL_FIELDS)); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testMigrateTableColumnStatistics() + { + SchemaTableName tableName = temporaryTable("rename_column"); + try { + createDummyTable(tableName); + + ExtendedHiveMetastore metastoreClient = getMetastoreClient(tableName.getSchemaName()); + assertThat(metastoreClient.supportsColumnStatistics()).isTrue(); + metastoreClient.addColumn(tableName.getSchemaName(), tableName.getTableName(), "dummy_1", HiveType.valueOf("string"), "comment"); + + PartitionStatistics dummyStatistics = new PartitionStatistics( + new HiveBasicStatistics(0, 0, 0, 0), + ImmutableMap.of("dummy_1", DUMMY_COLUMN_STATISTICS)); + + metastoreClient.updateTableStatistics(tableName.getSchemaName(), tableName.getTableName(), statistics -> dummyStatistics); + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(dummyStatistics); + + metastoreClient.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "dummy_1", "dummy_2"); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(new PartitionStatistics( + new HiveBasicStatistics(0, 0, 0, 0), + ImmutableMap.of("dummy_2", DUMMY_COLUMN_STATISTICS))); + + metastoreClient.dropColumn(tableName.getSchemaName(), tableName.getTableName(), "dummy_2"); + + assertThat(metastoreClient.getTableStatistics(tableName.getSchemaName(), tableName.getTableName())) + .isEqualTo(new PartitionStatistics( + new HiveBasicStatistics(0, 0, 0, 0), + ImmutableMap.of())); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testMigratePartitionColumnStatistics() + throws Exception + { + SchemaTableName tableName = temporaryTable("migrate_table_column_statistics"); + try { + doCreateEmptyTable(tableName, ORC, STATISTICS_PARTITIONED_TABLE_COLUMNS); + + ExtendedHiveMetastore metastoreClient = getMetastoreClient(tableName.getSchemaName()); + assertThat(metastoreClient.supportsColumnStatistics()).isTrue(); + metastoreClient.addColumn(tableName.getSchemaName(), tableName.getTableName(), "dummy_1", HiveType.valueOf("string"), "comment"); + + Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()) + .orElseThrow(() -> new TableNotFoundException(tableName)); + + metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(Partition.builder() + .setDatabaseName(tableName.getSchemaName()) + .setTableName(tableName.getTableName()) + .setColumns(table.getPartitionColumns()) + .setValues(ImmutableList.of("2016-01-01")) + .withStorage(storage -> storage + .setStorageFormat(fromHiveStorageFormat(HiveStorageFormat.ORC)) + .setLocation(table.getStorage().getLocation() + "/ds=2016-01-01")) + .build())); + + PartitionStatistics dummyStatistics = new PartitionStatistics( + new HiveBasicStatistics(0, 0, 0, 0), + ImmutableMap.of("dummy_1", DUMMY_COLUMN_STATISTICS)); + + metastoreClient.updatePartitionStatistics( + tableName.getSchemaName(), + tableName.getTableName(), + "ds=2016-01-01", + currentStatistics -> dummyStatistics); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of("ds=2016-01-01"))) + .isEqualTo(ImmutableMap.of("ds=2016-01-01", dummyStatistics)); + + metastoreClient.renameColumn(tableName.getSchemaName(), tableName.getTableName(), "dummy_1", "dummy_2"); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of("ds=2016-01-01"))) + .isEqualTo(ImmutableMap.of("ds=2016-01-01", new PartitionStatistics( + new HiveBasicStatistics(0, 0, 0, 0), + ImmutableMap.of("dummy_2", DUMMY_COLUMN_STATISTICS)))); + + metastoreClient.dropColumn(tableName.getSchemaName(), tableName.getTableName(), "dummy_2"); + + assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of("ds=2016-01-01"))) + .isEqualTo(ImmutableMap.of("ds=2016-01-01", new PartitionStatistics( + new HiveBasicStatistics(0, 0, 0, 0), + ImmutableMap.of()))); + } + finally { + dropTable(tableName); + } + } + private void createDummyTable(SchemaTableName tableName) { try (Transaction transaction = newTransaction()) { @@ -2453,7 +2869,7 @@ private void createDummyTable(SchemaTableName tableName) List columns = ImmutableList.of(new ColumnMetadata("dummy", createUnboundedVarcharType())); ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(TEXTFILE)); ConnectorOutputTableHandle handle = metadata.beginCreateTable(session, tableMetadata, Optional.empty()); - metadata.finishCreateTable(session, handle, ImmutableList.of()); + metadata.finishCreateTable(session, handle, ImmutableList.of(), ImmutableList.of()); transaction.commit(); } @@ -2547,7 +2963,7 @@ protected void doCreateTable(SchemaTableName tableName, HiveStorageFormat storag } // commit the table - metadata.finishCreateTable(session, outputHandle, fragments); + metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); transaction.commit(); } @@ -2573,7 +2989,7 @@ protected void doCreateTable(SchemaTableName tableName, HiveStorageFormat storag assertEquals(table.getParameters().get(PRESTO_QUERY_ID_NAME), queryId); // verify basic statistics - HiveBasicStatistics statistics = HiveBasicStatistics.createFromPartitionParameters(table.getParameters()); + HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount()); assertEquals(statistics.getFileCount().getAsLong(), 1L); assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); @@ -2635,7 +3051,7 @@ protected void doCreateEmptyTable(SchemaTableName tableName, HiveStorageFormat s // verify basic statistics if (partitionedBy.isEmpty()) { - HiveBasicStatistics statistics = HiveBasicStatistics.createFromPartitionParameters(table.getParameters()); + HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); assertEquals(statistics.getRowCount().getAsLong(), 0L); assertEquals(statistics.getFileCount().getAsLong(), 0L); assertEquals(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); @@ -2671,8 +3087,7 @@ private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName assertEqualsIgnoreOrder(result.getMaterializedRows(), resultBuilder.build().getMaterializedRows()); // statistics - Table table = transaction.getMetastore(tableName.getSchemaName()).getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - HiveBasicStatistics tableStatistics = HiveBasicStatistics.createFromPartitionParameters(table.getParameters()); + HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * (i + 1)); assertEquals(tableStatistics.getFileCount().getAsLong(), i + 1L); assertGreaterThan(tableStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); @@ -2700,17 +3115,15 @@ private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName sink.appendPage(CREATE_TABLE_DATA.toPage()); sink.appendPage(CREATE_TABLE_DATA.toPage()); Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments); + metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); // statistics, visible from within transaction - Table table = transaction.getMetastore(tableName.getSchemaName()).getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - HiveBasicStatistics tableStatistics = HiveBasicStatistics.createFromPartitionParameters(table.getParameters()); + HiveBasicStatistics tableStatistics = getBasicStatisticsForTable(transaction, tableName); assertEquals(tableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 5L); try (Transaction otherTransaction = newTransaction()) { // statistics, not visible from outside transaction - Table otherTable = otherTransaction.getMetastore(tableName.getSchemaName()).getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - HiveBasicStatistics otherTableStatistics = HiveBasicStatistics.createFromPartitionParameters(otherTable.getParameters()); + HiveBasicStatistics otherTableStatistics = getBasicStatisticsForTable(otherTransaction, tableName); assertEquals(otherTableStatistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L); } @@ -2750,8 +3163,7 @@ private void doInsert(HiveStorageFormat storageFormat, SchemaTableName tableName // verify statistics unchanged try (Transaction transaction = newTransaction()) { - Table table = transaction.getMetastore(tableName.getSchemaName()).getTable(tableName.getSchemaName(), tableName.getTableName()).get(); - HiveBasicStatistics statistics = HiveBasicStatistics.createFromPartitionParameters(table.getParameters()); + HiveBasicStatistics statistics = getBasicStatisticsForTable(transaction, tableName); assertEquals(statistics.getRowCount().getAsLong(), CREATE_TABLE_DATA.getRowCount() * 3L); assertEquals(statistics.getFileCount().getAsLong(), 3L); } @@ -2893,7 +3305,7 @@ private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTab // test statistics for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, dsColumn, partitionName); + HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName); assertEquals(partitionStatistics.getRowCount().getAsLong(), 1L); assertEquals(partitionStatistics.getFileCount().getAsLong(), 1L); assertGreaterThan(partitionStatistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); @@ -2913,7 +3325,7 @@ private void doInsertIntoNewPartition(HiveStorageFormat storageFormat, SchemaTab ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle); sink.appendPage(CREATE_TABLE_PARTITIONED_DATA_2ND.toPage()); Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments); + metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); // verify all temp files start with the unique prefix HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName()); @@ -3000,7 +3412,7 @@ private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, Sche // test statistics for (String partitionName : partitionNames) { - HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, dsColumn, partitionName); + HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, partitionName); assertEquals(statistics.getRowCount().getAsLong(), i + 1L); assertEquals(statistics.getFileCount().getAsLong(), i + 1L); assertGreaterThan(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L); @@ -3028,7 +3440,7 @@ private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, Sche sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage()); sink.appendPage(CREATE_TABLE_PARTITIONED_DATA.toPage()); Collection fragments = getFutureValue(sink.finish()); - metadata.finishInsert(session, insertTableHandle, fragments); + metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); // verify all temp files start with the unique prefix HdfsContext context = new HdfsContext(session, tableName.getSchemaName(), tableName.getTableName()); @@ -3042,7 +3454,7 @@ private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, Sche List partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, dsColumn, partitionName); + HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName); assertEquals(partitionStatistics.getRowCount().getAsLong(), 5L); } @@ -3071,7 +3483,7 @@ private void doInsertIntoExistingPartition(HiveStorageFormat storageFormat, Sche List partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(tableName.getSchemaName(), tableName.getTableName()) .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); for (String partitionName : partitionNames) { - HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, dsColumn, partitionName); + HiveBasicStatistics partitionStatistics = getBasicStatisticsForPartition(transaction, tableName, partitionName); assertEquals(partitionStatistics.getRowCount().getAsLong(), 3L); } } @@ -3092,7 +3504,7 @@ private void doInsertIntoExistingPartitionEmptyStatistics(HiveStorageFormat stor .orElseThrow(() -> new AssertionError("Table does not exist: " + tableName)); for (String partitionName : partitionNames) { - HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, dsColumn, partitionName); + HiveBasicStatistics statistics = getBasicStatisticsForPartition(transaction, tableName, partitionName); assertThat(statistics.getRowCount()).isNotPresent(); assertThat(statistics.getInMemoryDataSizeInBytes()).isNotPresent(); // fileCount and rawSize statistics are computed on the fly by the metastore, thus cannot be erased @@ -3100,30 +3512,32 @@ private void doInsertIntoExistingPartitionEmptyStatistics(HiveStorageFormat stor } } - private static String getPartitionValue(ColumnHandle columnHandle, String partitionName) - { - return partitionName.replaceFirst(((HiveColumnHandle) columnHandle).getName() + "=", ""); - } - - private static HiveBasicStatistics getBasicStatisticsForPartition(Transaction transaction, SchemaTableName table, ColumnHandle handle, String partitionName) + private static HiveBasicStatistics getBasicStatisticsForTable(Transaction transaction, SchemaTableName table) { - return HiveBasicStatistics.createFromPartitionParameters(getPartition(transaction, table, handle, partitionName).getParameters()); + return transaction + .getMetastore(table.getSchemaName()) + .getTableStatistics(table.getSchemaName(), table.getTableName()) + .getBasicStatistics(); } - private static Partition getPartition(Transaction transaction, SchemaTableName table, ColumnHandle handle, String partitionName) + private static HiveBasicStatistics getBasicStatisticsForPartition(Transaction transaction, SchemaTableName table, String partitionName) { return transaction .getMetastore(table.getSchemaName()) - .getPartition(table.getSchemaName(), table.getTableName(), ImmutableList.of(getPartitionValue(handle, partitionName))) - .get(); + .getPartitionStatistics(table.getSchemaName(), table.getTableName(), ImmutableSet.of(partitionName)) + .get(partitionName) + .getBasicStatistics(); } private void eraseStatistics(SchemaTableName schemaTableName) { ExtendedHiveMetastore metastoreClient = getMetastoreClient(schemaTableName.getSchemaName()); - metastoreClient.updateTableParameters(schemaTableName.getSchemaName(), schemaTableName.getTableName(), AbstractTestHiveClient::eraseStatistics); + metastoreClient.updateTableStatistics(schemaTableName.getSchemaName(), schemaTableName.getTableName(), statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); Table table = metastoreClient.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()) .orElseThrow(() -> new TableNotFoundException(schemaTableName)); + List partitionColumns = table.getPartitionColumns().stream() + .map(Column::getName) + .collect(toImmutableList()); if (!table.getPartitionColumns().isEmpty()) { List partitionNames = metastoreClient.getPartitionNames(schemaTableName.getSchemaName(), schemaTableName.getTableName()) .orElse(ImmutableList.of()); @@ -3136,19 +3550,15 @@ private void eraseStatistics(SchemaTableName schemaTableName) .map(Optional::get) .collect(toImmutableList()); for (Partition partition : partitions) { - metastoreClient.updatePartitionParameters(schemaTableName.getSchemaName(), schemaTableName.getTableName(), partition.getValues(), AbstractTestHiveClient::eraseStatistics); + metastoreClient.updatePartitionStatistics( + schemaTableName.getSchemaName(), + schemaTableName.getTableName(), + makePartName(partitionColumns, partition.getValues()), + statistics -> new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of())); } } } - private static Map eraseStatistics(Map tableParameters) - { - return tableParameters.entrySet() - .stream() - .filter(entry -> !STATISTICS_PARAMETERS.contains(entry.getKey())) - .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); - } - /** * @return query id */ @@ -3174,7 +3584,7 @@ private String insertData(SchemaTableName tableName, MaterializedResult data) Collection fragments = getFutureValue(sink.finish()); // commit the insert - metadata.finishInsert(session, insertTableHandle, fragments); + metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); transaction.commit(); } @@ -3867,7 +4277,7 @@ private void createEmptyTable(SchemaTableName schemaTableName, HiveStorageFormat .setSerdeParameters(ImmutableMap.of()); PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(tableOwner); - transaction.getMetastore(schemaName).createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), true); + transaction.getMetastore(schemaName).createTable(session, tableBuilder.build(), principalPrivileges, Optional.empty(), true, STATISTICS_EMPTY_TABLE); transaction.commit(); } @@ -4064,7 +4474,7 @@ private void doTestTransactionDeleteInsert( rollbackIfEquals(tag, ROLLBACK_AFTER_APPEND_PAGE); Collection fragments = getFutureValue(sink.finish()); rollbackIfEquals(tag, ROLLBACK_AFTER_SINK_FINISH); - metadata.finishInsert(session, insertTableHandle, fragments); + metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of()); rollbackIfEquals(tag, ROLLBACK_AFTER_FINISH_INSERT); assertEquals(tag, COMMIT); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java index 49ec7b8cbe539..aaa65c654f553 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileSystem.java @@ -27,6 +27,7 @@ import com.facebook.presto.hive.metastore.thrift.HiveCluster; import com.facebook.presto.hive.metastore.thrift.TestingHiveCluster; import com.facebook.presto.hive.metastore.thrift.ThriftHiveMetastore; +import com.facebook.presto.hive.util.Statistics; import com.facebook.presto.metadata.MetadataManager; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; @@ -180,7 +181,8 @@ protected void setup(String host, int port, String databaseName, Function ((HiveMetadata) transactionManager.get(transactionHandle)).getMetastore(), @@ -373,7 +375,7 @@ private void createTable(SchemaTableName tableName, HiveStorageFormat storageFor Collection fragments = getFutureValue(sink.finish()); // commit the table - metadata.finishCreateTable(session, outputHandle, fragments); + metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of()); transaction.commit(); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java b/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java index 6724c794dde74..b63fc4aea1bcc 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/HiveQueryRunner.java @@ -102,6 +102,7 @@ public static DistributedQueryRunner createQueryRunner(Iterable> ta .put("hive.security", security) .put("hive.max-partitions-per-scan", "1000") .put("hive.assume-canonical-partition-keys", "true") + .put("hive.collect-column-statistics-on-write", "ENABLED") .build(); Map hiveBucketedProperties = ImmutableMap.builder() .putAll(hiveProperties) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveBasicStatistics.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveBasicStatistics.java deleted file mode 100644 index 7c00f329aba8d..0000000000000 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveBasicStatistics.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.facebook.presto.hive; - -import org.testng.annotations.Test; - -import java.util.OptionalLong; - -import static com.facebook.presto.hive.HiveBasicStatistics.createFromPartitionParameters; -import static org.assertj.core.api.Assertions.assertThat; - -public class TestHiveBasicStatistics -{ - @Test - public void testRoundTrip() - { - testRoundTrip(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty())); - testRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.empty())); - testRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4))); - } - - private static void testRoundTrip(HiveBasicStatistics expected) - { - assertThat(createFromPartitionParameters(expected.toPartitionParameters())) - .isEqualTo(expected); - } -} diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java index ef579e059baf4..975cad6f6a9ff 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveClientConfig.java @@ -30,6 +30,8 @@ import java.util.TimeZone; import java.util.concurrent.TimeUnit; +import static com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption.DISABLED; +import static com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption.ENABLED_FOR_MARKED_TABLES; import static com.facebook.presto.hive.TestHiveUtil.nonDefaultTimeZone; public class TestHiveClientConfig @@ -104,7 +106,8 @@ public void testDefaults() .setFileSystemMaxCacheSize(1000) .setTableStatisticsEnabled(true) .setWritesToNonManagedTablesEnabled(false) - .setCreatesOfNonManagedTablesEnabled(true)); + .setCreatesOfNonManagedTablesEnabled(true) + .setCollectColumnStatisticsOnWrite(ENABLED_FOR_MARKED_TABLES)); } @Test @@ -178,6 +181,7 @@ public void testExplicitPropertyMappings() .put("hive.table-statistics-enabled", "false") .put("hive.non-managed-table-writes-enabled", "true") .put("hive.non-managed-table-creates-enabled", "false") + .put("hive.collect-column-statistics-on-write", "DISABLED") .build(); HiveClientConfig expected = new HiveClientConfig() @@ -247,7 +251,8 @@ public void testExplicitPropertyMappings() .setFileSystemMaxCacheSize(1010) .setTableStatisticsEnabled(false) .setWritesToNonManagedTablesEnabled(true) - .setCreatesOfNonManagedTablesEnabled(false); + .setCreatesOfNonManagedTablesEnabled(false) + .setCollectColumnStatisticsOnWrite(DISABLED); ConfigAssertions.assertFullMapping(properties, expected); } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java index 74d0bccae028a..da79bd3f1ce63 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java @@ -15,6 +15,7 @@ import com.facebook.presto.Session; import com.facebook.presto.connector.ConnectorId; +import com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption; import com.facebook.presto.hive.HiveSessionProperties.InsertExistingPartitionsBehavior; import com.facebook.presto.metadata.Metadata; import com.facebook.presto.metadata.QualifiedObjectName; @@ -54,6 +55,9 @@ import static com.facebook.presto.SystemSessionProperties.CONCURRENT_LIFESPANS_PER_NODE; import static com.facebook.presto.SystemSessionProperties.DISTRIBUTED_JOIN; import static com.facebook.presto.SystemSessionProperties.GROUPED_EXECUTION_FOR_AGGREGATION; +import static com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption.DISABLED; +import static com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption.ENABLED; +import static com.facebook.presto.hive.HiveClientConfig.CollectColumnStatisticsOnWriteOption.ENABLED_FOR_MARKED_TABLES; import static com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME; import static com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME; import static com.facebook.presto.hive.HiveQueryRunner.HIVE_CATALOG; @@ -773,6 +777,14 @@ private void testCreateInvalidBucketedTable(HiveStorageFormat storageFormat) assertFalse(getQueryRunner().tableExists(getSession(), tableName)); } + @Test + public void testCreatePartitionedUnionAll() + { + assertUpdate("CREATE TABLE test_create_partitioned_union_all (a varchar, ds varchar) WITH (partitioned_by = ARRAY['ds'])"); + assertUpdate("INSERT INTO test_create_partitioned_union_all SELECT 'a', '2013-05-17' UNION ALL SELECT 'b', '2013-05-17'", 2); + assertUpdate("DROP TABLE test_create_partitioned_union_all"); + } + @Test public void testInsertPartitionedBucketedTableFewRows() { @@ -2548,6 +2560,47 @@ public void testCurrentUserInView() assertUpdate("DROP TABLE test_accounts"); } + @Test + public void testCollectColumnStatisticsOnWriteSwitches() + { + assertCollectColumnStatisticsOnWrite(ENABLED, false, true); + assertCollectColumnStatisticsOnWrite(DISABLED, true, false); + assertCollectColumnStatisticsOnWrite(ENABLED_FOR_MARKED_TABLES, true, true); + assertCollectColumnStatisticsOnWrite(ENABLED_FOR_MARKED_TABLES, false, false); + } + + public void assertCollectColumnStatisticsOnWrite( + CollectColumnStatisticsOnWriteOption sessionProperty, + boolean tableProperty, + boolean expectStatisticsToBeCollected) + { + Session session = testSessionBuilder() + .setCatalog(getSession().getCatalog().get()) + .setSchema(getSession().getSchema().get()) + .setCatalogSessionProperty(getSession().getCatalog().get(), "collect_column_statistics_on_write", sessionProperty.name()) + .build(); + String tableName = "test_collect_column_statistics_on_write"; + assertUpdate(session, format("" + + "CREATE TABLE %s " + + "WITH (collect_column_statistics_on_write_enabled = %s) " + + "AS " + + "SELECT CAST(null AS BIGINT) as col1", tableName, tableProperty), 1); + assertUpdate(session, format("" + + "INSERT INTO %s " + + "SELECT CAST(null AS BIGINT)", tableName), 1); + + String expectedStatististics = expectStatisticsToBeCollected ? + "SELECT * FROM VALUES ('col1', null, 0.0E0, 1.0E0, null, null, null), (null, null, null, null, 2.0E0, null, null)" : + "SELECT * FROM VALUES ('col1', null, null, null, null, null, null), (null, null, null, null, 2.0E0, null, null)"; + + assertQuery( + session, + format("SHOW STATS FOR (SELECT * FROM %s)", tableName), + expectedStatististics); + + assertUpdate(session, format("DROP TABLE %s", tableName)); + } + private Session getParallelWriteSession() { return Session.builder(getSession()) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java index 31db3eaab5350..6e519cc1f9f26 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/glue/TestHiveClientGlueMetastore.java @@ -49,4 +49,40 @@ public void testRenameTable() { // rename table is not yet supported by Glue } + + @Override + public void testUpdateTableColumnStatistics() + { + // column level statistics are not supported by the Glue Metastore + } + + @Override + public void testUpdateTableColumnStatisticsEmptyOptionalFields() + { + // column level statistics are not supported by the Glue Metastore + } + + @Override + public void testUpdatePartitionColumnStatistics() + { + // column level statistics are not supported by the Glue Metastore + } + + @Override + public void testUpdatePartitionColumnStatisticsEmptyOptionalFields() + { + // column level statistics are not supported by the Glue Metastore + } + + @Override + public void testMigrateTableColumnStatistics() + { + // column level statistics are not supported by the Glue Metastore + } + + @Override + public void testMigratePartitionColumnStatistics() + { + // column level statistics are not supported by the Glue Metastore + } } diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java index 4c20e86aaaddb..709a6f08d0afe 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.hive.metastore.thrift; +import com.facebook.presto.hive.PartitionStatistics; import com.facebook.presto.hive.SchemaAlreadyExistsException; import com.facebook.presto.hive.TableAlreadyExistsException; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; @@ -26,7 +27,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.metastore.TableType; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Partition; @@ -53,6 +53,7 @@ import java.util.Set; import java.util.function.Function; +import static com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics; import static com.facebook.presto.hive.HiveUtil.toPartitionValues; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; import static com.facebook.presto.spi.StandardErrorCode.SCHEMA_NOT_EMPTY; @@ -84,9 +85,9 @@ public class InMemoryHiveMetastore @GuardedBy("this") private final Map partitions = new HashMap<>(); @GuardedBy("this") - private final Map> columnStatistics = new HashMap<>(); + private final Map columnStatistics = new HashMap<>(); @GuardedBy("this") - private final Map> partitionColumnStatistics = new HashMap<>(); + private final Map partitionColumnStatistics = new HashMap<>(); @GuardedBy("this") private final Map> roleGrants = new HashMap<>(); @GuardedBy("this") @@ -441,55 +442,48 @@ public synchronized Optional
getTable(String databaseName, String tableNa } @Override - public synchronized Set getTableColumnStatistics(String databaseName, String tableName, Set columnNames) + public boolean supportsColumnStatistics() { - SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); - if (!columnStatistics.containsKey(schemaTableName)) { - return ImmutableSet.of(); - } - - Map columnStatisticsMap = columnStatistics.get(schemaTableName); - return columnNames.stream() - .filter(columnStatisticsMap::containsKey) - .map(columnStatisticsMap::get) - .collect(toImmutableSet()); + return true; } - public synchronized void setColumnStatistics(String databaseName, String tableName, String columnName, ColumnStatisticsObj columnStatisticsObj) + @Override + public synchronized PartitionStatistics getTableStatistics(String databaseName, String tableName) { - checkArgument(columnStatisticsObj.getColName().equals(columnName), "columnName argument and columnStatisticsObj.getColName() must be the same"); SchemaTableName schemaTableName = new SchemaTableName(databaseName, tableName); - columnStatistics.computeIfAbsent(schemaTableName, key -> new HashMap<>()).put(columnName, columnStatisticsObj); + PartitionStatistics statistics = columnStatistics.get(schemaTableName); + if (statistics == null) { + statistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); + } + return statistics; } @Override - public synchronized Map> getPartitionColumnStatistics(String databaseName, String tableName, Set partitionNames, Set columnNames) + public synchronized Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) { - ImmutableMap.Builder> result = ImmutableMap.builder(); + ImmutableMap.Builder result = ImmutableMap.builder(); for (String partitionName : partitionNames) { PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionName); - if (!partitionColumnStatistics.containsKey(partitionKey)) { - continue; + PartitionStatistics statistics = partitionColumnStatistics.get(partitionKey); + if (statistics == null) { + statistics = new PartitionStatistics(createEmptyStatistics(), ImmutableMap.of()); } - - Map columnStatistics = partitionColumnStatistics.get(partitionKey); - result.put( - partitionName, - columnNames.stream() - .filter(columnStatistics::containsKey) - .map(columnStatistics::get) - .collect(toImmutableSet())); + result.put(partitionName, statistics); } return result.build(); } - public synchronized void setPartitionColumnStatistics(String databaseName, String tableName, String partitionName, String columnName, ColumnStatisticsObj columnStatisticsObj) + @Override + public synchronized void updateTableStatistics(String databaseName, String tableName, Function update) + { + columnStatistics.put(new SchemaTableName(databaseName, tableName), update.apply(getTableStatistics(databaseName, tableName))); + } + + @Override + public synchronized void updatePartitionStatistics(String databaseName, String tableName, String partitionName, Function update) { - checkArgument(columnStatisticsObj.getColName().equals(columnName), "columnName argument and columnStatisticsObj.getColName() must be the same"); PartitionName partitionKey = PartitionName.partition(databaseName, tableName, partitionName); - partitionColumnStatistics - .computeIfAbsent(partitionKey, key -> new HashMap<>()) - .put(columnName, columnStatisticsObj); + partitionColumnStatistics.put(partitionKey, update.apply(getPartitionStatistics(databaseName, tableName, ImmutableSet.of(partitionName)).get(partitionName))); } @Override diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/MockHiveMetastoreClient.java b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/MockHiveMetastoreClient.java index 080efa8fd8a74..5f853ee27fd73 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/MockHiveMetastoreClient.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/MockHiveMetastoreClient.java @@ -136,12 +136,36 @@ public List getTableColumnStatistics(String databaseName, S throw new UnsupportedOperationException(); } + @Override + public void setTableColumnStatistics(String databaseName, String tableName, List statistics) + { + throw new UnsupportedOperationException(); + } + + @Override + public void deleteTableColumnStatistics(String databaseName, String tableName, String columnName) + { + throw new UnsupportedOperationException(); + } + @Override public Map> getPartitionColumnStatistics(String databaseName, String tableName, List partitionNames, List columnNames) { throw new UnsupportedOperationException(); } + @Override + public void setPartitionColumnStatistics(String databaseName, String tableName, String partitionName, List statistics) + { + throw new UnsupportedOperationException(); + } + + @Override + public void deletePartitionColumnStatistics(String databaseName, String tableName, String partitionName, String columnName) + { + throw new UnsupportedOperationException(); + } + @Override public List getTableNamesByFilter(String databaseName, String filter) { diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftMetastoreUtil.java b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftMetastoreUtil.java index 2df6ddcedb9da..ac52382da91c4 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftMetastoreUtil.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftMetastoreUtil.java @@ -13,7 +13,9 @@ */ package com.facebook.presto.hive.metastore.thrift; +import com.facebook.presto.hive.HiveBasicStatistics; import com.facebook.presto.hive.metastore.HiveColumnStatistics; +import com.google.common.collect.ImmutableMap; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -34,6 +36,8 @@ import java.util.OptionalLong; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiColumnStatistics; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticParameters; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.binaryStats; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.booleanStats; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.dateStats; @@ -262,6 +266,19 @@ public void testEmptyBinaryStatsToColumnStatistics() assertEmptyColumnStats(actual); } + @Test + public void testBasicStatisticsRoundTrip() + { + testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty())); + testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.empty(), OptionalLong.of(2), OptionalLong.empty())); + testBasicStatisticsRoundTrip(new HiveBasicStatistics(OptionalLong.of(1), OptionalLong.of(2), OptionalLong.of(3), OptionalLong.of(4))); + } + + private static void testBasicStatisticsRoundTrip(HiveBasicStatistics expected) + { + assertEquals(getHiveBasicStatistics(updateStatisticParameters(ImmutableMap.of(), expected)), expected); + } + private static void assertEmptyColumnStats(HiveColumnStatistics actual) { assertEquals(actual.getLowValue(), Optional.empty()); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestStatistics.java b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestStatistics.java index 1cd09d545f331..11b35e671783c 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestStatistics.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestStatistics.java @@ -14,21 +14,21 @@ package com.facebook.presto.hive.util; import com.facebook.presto.hive.HiveBasicStatistics; -import com.facebook.presto.hive.metastore.Partition; -import com.facebook.presto.hive.metastore.Table; -import com.facebook.presto.hive.util.Statistics.ReduceOperator; -import com.google.common.collect.ImmutableList; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; +import com.google.common.collect.ImmutableMap; import org.testng.annotations.Test; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalDouble; +import java.util.OptionalLong; + import static com.facebook.presto.hive.HiveBasicStatistics.createEmptyStatistics; -import static com.facebook.presto.hive.HiveBasicStatistics.createFromPartitionParameters; import static com.facebook.presto.hive.HiveBasicStatistics.createZeroStatistics; -import static com.facebook.presto.hive.metastore.glue.TestingMetastoreObjects.getPrestoTestPartition; -import static com.facebook.presto.hive.metastore.glue.TestingMetastoreObjects.getPrestoTestTable; import static com.facebook.presto.hive.util.Statistics.ReduceOperator.ADD; import static com.facebook.presto.hive.util.Statistics.ReduceOperator.SUBTRACT; +import static com.facebook.presto.hive.util.Statistics.merge; import static com.facebook.presto.hive.util.Statistics.reduce; -import static com.facebook.presto.hive.util.Statistics.updateStatistics; import static org.assertj.core.api.Assertions.assertThat; public class TestStatistics @@ -53,74 +53,102 @@ public void testReduce() } @Test - public void testUpdateTableStatistics() + public void testMergeHiveColumnStatistics() { - testUpdateTableStatistics(ADD, createEmptyStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdateTableStatistics(SUBTRACT, createEmptyStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdateTableStatistics(ADD, createZeroStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdateTableStatistics(SUBTRACT, createZeroStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdateTableStatistics(ADD, createEmptyStatistics(), createZeroStatistics(), createEmptyStatistics()); - testUpdateTableStatistics(SUBTRACT, createEmptyStatistics(), createZeroStatistics(), createEmptyStatistics()); - testUpdateTableStatistics( - ADD, - new HiveBasicStatistics(1, 2, 3, 4), - new HiveBasicStatistics(2, 3, 4, 5), - new HiveBasicStatistics(3, 5, 7, 9)); - testUpdateTableStatistics( - SUBTRACT, - new HiveBasicStatistics(11, 9, 7, 5), - new HiveBasicStatistics(1, 2, 3, 4), - new HiveBasicStatistics(10, 7, 4, 1)); - } + assertMergeHiveColumnStatistics( + new HiveColumnStatistics( + Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()), + OptionalLong.empty(), + new HiveColumnStatistics( + Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()), + OptionalLong.empty(), + new HiveColumnStatistics( + Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty())); - @Test - public void testUpdatePartitionStatistics() - { - testUpdatePartitionStatistics(ADD, createEmptyStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdatePartitionStatistics(SUBTRACT, createEmptyStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdatePartitionStatistics(ADD, createZeroStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdatePartitionStatistics(SUBTRACT, createZeroStatistics(), createEmptyStatistics(), createEmptyStatistics()); - testUpdatePartitionStatistics(ADD, createEmptyStatistics(), createZeroStatistics(), createEmptyStatistics()); - testUpdatePartitionStatistics(SUBTRACT, createEmptyStatistics(), createZeroStatistics(), createEmptyStatistics()); - testUpdatePartitionStatistics( - ADD, - new HiveBasicStatistics(1, 2, 3, 4), - new HiveBasicStatistics(2, 3, 4, 5), - new HiveBasicStatistics(3, 5, 7, 9)); - testUpdatePartitionStatistics( - SUBTRACT, - new HiveBasicStatistics(11, 9, 7, 5), - new HiveBasicStatistics(1, 2, 3, 4), - new HiveBasicStatistics(10, 7, 4, 1)); - } + assertMergeHiveColumnStatistics( + new HiveColumnStatistics( + Optional.empty(), Optional.empty(), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty()), + OptionalLong.empty(), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(1), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + OptionalLong.empty(), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.empty(), OptionalDouble.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty(), OptionalLong.empty())); - private static void testUpdateTableStatistics(ReduceOperator operator, HiveBasicStatistics initial, HiveBasicStatistics update, HiveBasicStatistics expected) - { - Table initialTable = table(initial); - Table updatedTable = updateStatistics(initialTable, update, operator); - HiveBasicStatistics updatedStatistics = createFromPartitionParameters(updatedTable.getParameters()); - assertThat(updatedStatistics).isEqualTo(expected); - } + assertMergeHiveColumnStatistics( + new HiveColumnStatistics( + Optional.of(6L), Optional.of(4L), OptionalLong.of(5), OptionalDouble.of(3), OptionalLong.of(7), OptionalLong.of(9), OptionalLong.of(2), OptionalLong.of(8)), + OptionalLong.of(1), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(3), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + OptionalLong.of(1), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(5), OptionalDouble.of(2.5), OptionalLong.of(10), OptionalLong.of(13), OptionalLong.of(7), OptionalLong.of(8))); - private static void testUpdatePartitionStatistics(ReduceOperator operator, HiveBasicStatistics initial, HiveBasicStatistics update, HiveBasicStatistics expected) - { - Partition initialPartition = partition(initial); - Partition updatedPartition = updateStatistics(initialPartition, update, operator); - HiveBasicStatistics updatedStatistics = createFromPartitionParameters(updatedPartition.getParameters()); - assertThat(updatedStatistics).isEqualTo(expected); + assertMergeHiveColumnStatistics( + new HiveColumnStatistics( + Optional.of(6L), Optional.of(4L), OptionalLong.of(5), OptionalDouble.of(3), OptionalLong.of(7), OptionalLong.of(9), OptionalLong.of(2), OptionalLong.of(8)), + OptionalLong.empty(), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(3), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + OptionalLong.of(1), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(5), OptionalDouble.empty(), OptionalLong.of(10), OptionalLong.of(13), OptionalLong.of(7), OptionalLong.of(8))); + + assertMergeHiveColumnStatistics( + new HiveColumnStatistics( + Optional.of(6L), Optional.of(4L), OptionalLong.of(5), OptionalDouble.of(3), OptionalLong.of(7), OptionalLong.of(9), OptionalLong.of(2), OptionalLong.of(8)), + OptionalLong.of(4), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(3), OptionalDouble.of(4), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + OptionalLong.of(1), + new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(5), OptionalDouble.of(3.2), OptionalLong.of(10), OptionalLong.of(13), OptionalLong.of(7), OptionalLong.of(8))); + + assertMergeHiveColumnStatistics( + new HiveColumnStatistics( + Optional.of("5"), Optional.of("5"), OptionalLong.of(3), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + OptionalLong.of(1), + new HiveColumnStatistics( + Optional.of("6"), Optional.of("4"), OptionalLong.of(5), OptionalDouble.of(3), OptionalLong.of(7), OptionalLong.of(9), OptionalLong.of(2), OptionalLong.of(8)), + OptionalLong.of(1), + new HiveColumnStatistics( + Optional.of("5"), Optional.of("5"), OptionalLong.of(5), OptionalDouble.of(2.5), OptionalLong.of(10), OptionalLong.of(13), OptionalLong.of(7), OptionalLong.of(8))); } - private static Table table(HiveBasicStatistics statistics) + @Test + public void testMergeHiveColumnStatisticsMap() { - return Table.builder(getPrestoTestTable("test_database")) - .setParameters(statistics.toPartitionParameters()) - .build(); + Map first = ImmutableMap.of( + "column1", new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(1), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + "column2", new HiveColumnStatistics( + Optional.of(6L), Optional.of(4L), OptionalLong.of(5), OptionalDouble.of(3), OptionalLong.of(7), OptionalLong.of(9), OptionalLong.of(2), OptionalLong.of(8)), + "column3", new HiveColumnStatistics( + Optional.of("5"), Optional.of("5"), OptionalLong.of(3), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + "column4", new HiveColumnStatistics( + Optional.of("5"), Optional.of("5"), OptionalLong.of(3), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6))); + Map second = ImmutableMap.of( + "column5", new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(1), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + "column2", new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(3), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6)), + "column3", new HiveColumnStatistics( + Optional.of("6"), Optional.of("4"), OptionalLong.of(5), OptionalDouble.of(3), OptionalLong.of(7), OptionalLong.of(9), OptionalLong.of(2), OptionalLong.of(8)), + "column6", new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(1), OptionalDouble.of(2), OptionalLong.of(3), OptionalLong.of(4), OptionalLong.of(5), OptionalLong.of(6))); + Map expected = ImmutableMap.of( + "column2", new HiveColumnStatistics( + Optional.of(5L), Optional.of(5L), OptionalLong.of(5), OptionalDouble.of(2.5), OptionalLong.of(10), OptionalLong.of(13), OptionalLong.of(7), OptionalLong.of(8)), + "column3", new HiveColumnStatistics( + Optional.of("5"), Optional.of("5"), OptionalLong.of(5), OptionalDouble.of(2.5), OptionalLong.of(10), OptionalLong.of(13), OptionalLong.of(7), OptionalLong.of(8))); + assertThat(merge(first, OptionalLong.of(5), second, OptionalLong.of(5))).isEqualTo(expected); + assertThat(merge(ImmutableMap.of(), OptionalLong.empty(), ImmutableMap.of(), OptionalLong.empty())).isEqualTo(ImmutableMap.of()); } - private static Partition partition(HiveBasicStatistics statistics) + private static void assertMergeHiveColumnStatistics( + HiveColumnStatistics first, OptionalLong firstRowCount, HiveColumnStatistics second, OptionalLong secondRowCount, HiveColumnStatistics expected) { - return Partition.builder(getPrestoTestPartition("test_database", "test_table", ImmutableList.of("test_partition"))) - .setParameters(statistics.toPartitionParameters()) - .build(); + assertThat(merge(first, firstRowCount, second, secondRowCount)).isEqualTo(expected); } } diff --git a/presto-main/src/main/java/com/facebook/presto/metadata/Metadata.java b/presto-main/src/main/java/com/facebook/presto/metadata/Metadata.java index 140decfeaf6a0..9358102230b91 100644 --- a/presto-main/src/main/java/com/facebook/presto/metadata/Metadata.java +++ b/presto-main/src/main/java/com/facebook/presto/metadata/Metadata.java @@ -29,7 +29,9 @@ import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.security.GrantInfo; import com.facebook.presto.spi.security.Privilege; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.statistics.TableStatistics; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.facebook.presto.spi.type.TypeSignature; @@ -189,10 +191,20 @@ public interface Metadata /** * Finish a table creation with data after the data is written. */ - Optional finishCreateTable(Session session, OutputTableHandle tableHandle, Collection fragments); + Optional finishCreateTable(Session session, OutputTableHandle tableHandle, Collection fragments, List computedStatistics); Optional getInsertLayout(Session session, TableHandle target); + /** + * Describes statistics that must be collected for a new table + */ + TableStatisticsMetadata getNewTableStatisticsMetadata(Session session, String catalogName, ConnectorTableMetadata tableMetadata); + + /** + * Describes statistics that must be collected for an existing table during the INSERT operation + */ + TableStatisticsMetadata getInsertStatisticsMetadata(Session session, TableHandle tableHandle); + /** * Start a SELECT/UPDATE/INSERT/DELETE query */ @@ -212,7 +224,7 @@ public interface Metadata /** * Finish insert query */ - Optional finishInsert(Session session, InsertTableHandle tableHandle, Collection fragments); + Optional finishInsert(Session session, InsertTableHandle tableHandle, Collection fragments, List computedStatistics); /** * Get the row ID column handle used with UpdatablePageSource. diff --git a/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java b/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java index 5366f5d4f01f8..fb7ef1ca5f030 100644 --- a/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java +++ b/presto-main/src/main/java/com/facebook/presto/metadata/MetadataManager.java @@ -44,7 +44,9 @@ import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.security.GrantInfo; import com.facebook.presto.spi.security.Privilege; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.statistics.TableStatistics; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.facebook.presto.spi.type.TypeSignature; @@ -601,6 +603,24 @@ public Optional getInsertLayout(Session session, TableHandle tab .map(layout -> new NewTableLayout(connectorId, catalogMetadata.getTransactionHandleFor(connectorId), layout)); } + @Override + public TableStatisticsMetadata getNewTableStatisticsMetadata(Session session, String catalogName, ConnectorTableMetadata tableMetadata) + { + CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, catalogName); + ConnectorMetadata metadata = catalogMetadata.getMetadata(); + ConnectorId connectorId = catalogMetadata.getConnectorId(); + return metadata.getNewTableStatisticsMetadata(session.toConnectorSession(connectorId), tableMetadata); + } + + @Override + public TableStatisticsMetadata getInsertStatisticsMetadata(Session session, TableHandle table) + { + ConnectorId connectorId = table.getConnectorId(); + CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, connectorId); + ConnectorMetadata metadata = catalogMetadata.getMetadata(); + return metadata.getInsertIntoTableStatisticsMetadata(session.toConnectorSession(connectorId), table.getConnectorHandle()); + } + @Override public Optional getNewTableLayout(Session session, String catalogName, ConnectorTableMetadata tableMetadata) { @@ -663,11 +683,11 @@ public OutputTableHandle beginCreateTable(Session session, String catalogName, C } @Override - public Optional finishCreateTable(Session session, OutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(Session session, OutputTableHandle tableHandle, Collection fragments, List computedStatistics) { ConnectorId connectorId = tableHandle.getConnectorId(); ConnectorMetadata metadata = getMetadata(session, connectorId); - return metadata.finishCreateTable(session.toConnectorSession(connectorId), tableHandle.getConnectorHandle(), fragments); + return metadata.finishCreateTable(session.toConnectorSession(connectorId), tableHandle.getConnectorHandle(), fragments, computedStatistics); } @Override @@ -682,11 +702,11 @@ public InsertTableHandle beginInsert(Session session, TableHandle tableHandle) } @Override - public Optional finishInsert(Session session, InsertTableHandle tableHandle, Collection fragments) + public Optional finishInsert(Session session, InsertTableHandle tableHandle, Collection fragments, List computedStatistics) { ConnectorId connectorId = tableHandle.getConnectorId(); ConnectorMetadata metadata = getMetadata(session, connectorId); - return metadata.finishInsert(session.toConnectorSession(connectorId), tableHandle.getConnectorHandle(), fragments); + return metadata.finishInsert(session.toConnectorSession(connectorId), tableHandle.getConnectorHandle(), fragments, computedStatistics); } @Override diff --git a/presto-main/src/main/java/com/facebook/presto/operator/NoOpOperator.java b/presto-main/src/main/java/com/facebook/presto/operator/NoOpOperator.java new file mode 100644 index 0000000000000..c854aad07de3f --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/operator/NoOpOperator.java @@ -0,0 +1,96 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator; + +import com.facebook.presto.spi.Page; +import com.facebook.presto.sql.planner.plan.PlanNodeId; + +import static java.util.Objects.requireNonNull; + +public class NoOpOperator + implements Operator +{ + public static class NoOpOperatorFactory + implements OperatorFactory + { + private final int operatorId; + private final PlanNodeId planNodeId; + + public NoOpOperatorFactory(int operatorId, PlanNodeId planNodeId) + { + this.operatorId = operatorId; + this.planNodeId = requireNonNull(planNodeId, "planNodeId is null"); + } + + @Override + public Operator createOperator(DriverContext driverContext) + { + return new NoOpOperator(driverContext.addOperatorContext(operatorId, planNodeId, NoOpOperator.class.getSimpleName())); + } + + @Override + public void noMoreOperators() + { + } + + @Override + public OperatorFactory duplicate() + { + return new NoOpOperatorFactory(operatorId, planNodeId); + } + } + + private final OperatorContext context; + private boolean finished; + + public NoOpOperator(OperatorContext context) + { + this.context = requireNonNull(context, "context is null"); + } + + @Override + public OperatorContext getOperatorContext() + { + return context; + } + + @Override + public boolean needsInput() + { + return !finished; + } + + @Override + public void addInput(Page page) + { + } + + @Override + public Page getOutput() + { + return null; + } + + @Override + public void finish() + { + finished = true; + } + + @Override + public boolean isFinished() + { + return finished; + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/operator/TableFinishOperator.java b/presto-main/src/main/java/com/facebook/presto/operator/TableFinishOperator.java index 92bcef5422a95..739cb46337f57 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/TableFinishOperator.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/TableFinishOperator.java @@ -17,9 +17,12 @@ import com.facebook.presto.spi.PageBuilder; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.type.Type; import com.facebook.presto.sql.planner.plan.PlanNodeId; +import com.facebook.presto.sql.planner.plan.StatisticAggregationsDescriptor; import com.google.common.collect.ImmutableList; +import com.google.common.util.concurrent.ListenableFuture; import io.airlift.slice.Slice; import java.util.Collection; @@ -42,13 +45,22 @@ public static class TableFinishOperatorFactory private final int operatorId; private final PlanNodeId planNodeId; private final TableFinisher tableFinisher; + private final OperatorFactory statisticsAggregation; + private final StatisticAggregationsDescriptor descriptor; private boolean closed; - public TableFinishOperatorFactory(int operatorId, PlanNodeId planNodeId, TableFinisher tableFinisher) + public TableFinishOperatorFactory( + int operatorId, + PlanNodeId planNodeId, + TableFinisher tableFinisher, + OperatorFactory statisticsAggregation, + StatisticAggregationsDescriptor descriptor) { this.operatorId = operatorId; this.planNodeId = requireNonNull(planNodeId, "planNodeId is null"); this.tableFinisher = requireNonNull(tableFinisher, "tableCommitter is null"); + this.statisticsAggregation = requireNonNull(statisticsAggregation, "statisticsAggregation is null"); + this.descriptor = requireNonNull(descriptor, "descriptor is null"); } @Override @@ -56,7 +68,7 @@ public Operator createOperator(DriverContext driverContext) { checkState(!closed, "Factory is already closed"); OperatorContext context = driverContext.addOperatorContext(operatorId, planNodeId, TableFinishOperator.class.getSimpleName()); - return new TableFinishOperator(context, tableFinisher); + return new TableFinishOperator(context, tableFinisher, statisticsAggregation.createOperator(driverContext), descriptor); } @Override @@ -68,7 +80,7 @@ public void noMoreOperators() @Override public OperatorFactory duplicate() { - return new TableFinishOperatorFactory(operatorId, planNodeId, tableFinisher); + return new TableFinishOperatorFactory(operatorId, planNodeId, tableFinisher, statisticsAggregation, descriptor); } } @@ -79,6 +91,8 @@ private enum State private final OperatorContext operatorContext; private final TableFinisher tableFinisher; + private final Operator statisticsAggregation; + private final StatisticAggregationsDescriptor descriptor; private State state = State.RUNNING; private long rowCount; @@ -86,10 +100,16 @@ private enum State private Optional outputMetadata = Optional.empty(); private final ImmutableList.Builder fragmentBuilder = ImmutableList.builder(); - public TableFinishOperator(OperatorContext operatorContext, TableFinisher tableFinisher) + public TableFinishOperator( + OperatorContext operatorContext, + TableFinisher tableFinisher, + Operator statisticsAggregation, + StatisticAggregationsDescriptor descriptor) { this.operatorContext = requireNonNull(operatorContext, "operatorContext is null"); this.tableFinisher = requireNonNull(tableFinisher, "tableCommitter is null"); + this.statisticsAggregation = requireNonNull(statisticsAggregation, "statisticsAggregation is null"); + this.descriptor = requireNonNull(descriptor, "descriptor is null"); operatorContext.setInfoSupplier(() -> new TableFinishInfo(outputMetadata)); } @@ -105,6 +125,7 @@ public void finish() { if (state == State.RUNNING) { state = State.FINISHING; + statisticsAggregation.finish(); } } @@ -114,10 +135,16 @@ public boolean isFinished() return state == State.FINISHED; } + @Override + public ListenableFuture isBlocked() + { + return statisticsAggregation.isBlocked(); + } + @Override public boolean needsInput() { - return state == State.RUNNING; + return state == State.RUNNING && statisticsAggregation.needsInput(); } @Override @@ -126,6 +153,21 @@ public void addInput(Page page) requireNonNull(page, "page is null"); checkState(state == State.RUNNING, "Operator is %s", state); + if (isStatisticsPage(page)) { + statisticsAggregation.addInput(page); + } + else { + processFragmentPage(page); + } + } + + private boolean isStatisticsPage(Page page) + { + return page.getPositionCount() > 0 && page.getBlock(0).isNull(0); + } + + private void processFragmentPage(Page page) + { Block rowCountBlock = page.getBlock(0); Block fragmentBlock = page.getBlock(1); for (int position = 0; position < page.getPositionCount(); position++) { @@ -141,12 +183,17 @@ public void addInput(Page page) @Override public Page getOutput() { + if (!isBlocked().isDone()) { + return null; + } + if (state != State.FINISHING) { return null; } state = State.FINISHED; - outputMetadata = tableFinisher.finishTable(fragmentBuilder.build()); + List statistics = getComputedStatistics(); + outputMetadata = tableFinisher.finishTable(fragmentBuilder.build(), statistics); // output page will only be constructed once, // so a new PageBuilder is constructed (instead of using PageBuilder.reset) @@ -156,16 +203,51 @@ public Page getOutput() return page.build(); } + private List getComputedStatistics() + { + ImmutableList.Builder statistics = ImmutableList.builder(); + while (!statisticsAggregation.isFinished()) { + Page page = statisticsAggregation.getOutput(); + if (page == null) { + continue; + } + for (int position = 0; position < page.getPositionCount(); position++) { + statistics.add(getComputedStatistics(page, position)); + } + } + return statistics.build(); + } + + private ComputedStatistics getComputedStatistics(Page page, int position) + { + ImmutableList.Builder groupingColumns = ImmutableList.builder(); + ImmutableList.Builder groupingValues = ImmutableList.builder(); + descriptor.getGrouping().forEach((channel, column) -> { + groupingColumns.add(column); + groupingValues.add(page.getBlock(channel).getSingleValueBlock(position)); + }); + + ComputedStatistics.Builder statistics = ComputedStatistics.builder(groupingColumns.build(), groupingValues.build()); + + descriptor.getTableStatistics().forEach((channel, type) -> statistics.addTableStatistic(type, page.getBlock(channel).getSingleValueBlock(position))); + + descriptor.getColumnStatistics().forEach((channel, metadata) -> statistics.addColumnStatistic(metadata, page.getBlock(channel).getSingleValueBlock(position))); + + return statistics.build(); + } + @Override public void close() + throws Exception { if (!closed) { closed = true; + statisticsAggregation.close(); } } public interface TableFinisher { - Optional finishTable(Collection fragments); + Optional finishTable(Collection fragments, List computedStatistics); } } diff --git a/presto-main/src/main/java/com/facebook/presto/operator/TableWriterOperator.java b/presto-main/src/main/java/com/facebook/presto/operator/TableWriterOperator.java index a7e1c054ce983..05890b7fa2230 100644 --- a/presto-main/src/main/java/com/facebook/presto/operator/TableWriterOperator.java +++ b/presto-main/src/main/java/com/facebook/presto/operator/TableWriterOperator.java @@ -20,6 +20,7 @@ import com.facebook.presto.spi.PageBuilder; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.BlockBuilder; +import com.facebook.presto.spi.block.RunLengthEncodedBlock; import com.facebook.presto.spi.type.Type; import com.facebook.presto.split.PageSinkManager; import com.facebook.presto.sql.planner.plan.PlanNodeId; @@ -51,7 +52,8 @@ public class TableWriterOperator implements Operator { - public static final List TYPES = ImmutableList.of(BIGINT, VARBINARY); + private static final int ROW_COUNT_CHANNEL = 0; + private static final int FRAGMENT_CHANNEL = 1; public static class TableWriterOperatorFactory implements OperatorFactory @@ -60,24 +62,30 @@ public static class TableWriterOperatorFactory private final PlanNodeId planNodeId; private final PageSinkManager pageSinkManager; private final WriterTarget target; - private final List inputChannels; + private final List columnChannels; private final Session session; + private final OperatorFactory statisticsAggregation; + private final List types; private boolean closed; public TableWriterOperatorFactory(int operatorId, PlanNodeId planNodeId, PageSinkManager pageSinkManager, WriterTarget writerTarget, - List inputChannels, - Session session) + List columnChannels, + Session session, + OperatorFactory statisticsAggregation, + List types) { this.operatorId = operatorId; this.planNodeId = requireNonNull(planNodeId, "planNodeId is null"); - this.inputChannels = requireNonNull(inputChannels, "inputChannels is null"); + this.columnChannels = requireNonNull(columnChannels, "columnChannels is null"); this.pageSinkManager = requireNonNull(pageSinkManager, "pageSinkManager is null"); checkArgument(writerTarget instanceof CreateHandle || writerTarget instanceof InsertHandle, "writerTarget must be CreateHandle or InsertHandle"); this.target = requireNonNull(writerTarget, "writerTarget is null"); this.session = session; + this.statisticsAggregation = requireNonNull(statisticsAggregation, "statisticsAggregation is null"); + this.types = ImmutableList.copyOf(requireNonNull(types, "types is null")); } @Override @@ -85,7 +93,7 @@ public Operator createOperator(DriverContext driverContext) { checkState(!closed, "Factory is already closed"); OperatorContext context = driverContext.addOperatorContext(operatorId, planNodeId, TableWriterOperator.class.getSimpleName()); - return new TableWriterOperator(context, createPageSink(), inputChannels); + return new TableWriterOperator(context, createPageSink(), columnChannels, statisticsAggregation.createOperator(driverContext), types); } private ConnectorPageSink createPageSink() @@ -108,7 +116,7 @@ public void noMoreOperators() @Override public OperatorFactory duplicate() { - return new TableWriterOperatorFactory(operatorId, planNodeId, pageSinkManager, target, inputChannels, session); + return new TableWriterOperatorFactory(operatorId, planNodeId, pageSinkManager, target, columnChannels, session, statisticsAggregation, types); } } @@ -120,8 +128,10 @@ private enum State private final OperatorContext operatorContext; private final LocalMemoryContext pageSinkMemoryContext; private final ConnectorPageSink pageSink; - private final List inputChannels; + private final List columnChannels; private final AtomicLong pageSinkPeakMemoryUsage = new AtomicLong(); + private final Operator statisticAggregation; + private final List types; private ListenableFuture blocked = NOT_BLOCKED; private CompletableFuture> finishFuture; @@ -131,15 +141,20 @@ private enum State private boolean closed; private long writtenBytes; - public TableWriterOperator(OperatorContext operatorContext, + public TableWriterOperator( + OperatorContext operatorContext, ConnectorPageSink pageSink, - List inputChannels) + List columnChannels, + Operator statisticAggregation, + List types) { this.operatorContext = requireNonNull(operatorContext, "operatorContext is null"); this.pageSinkMemoryContext = operatorContext.newLocalSystemMemoryContext(); this.pageSink = requireNonNull(pageSink, "pageSink is null"); - this.inputChannels = requireNonNull(inputChannels, "inputChannels is null"); + this.columnChannels = requireNonNull(columnChannels, "columnChannels is null"); this.operatorContext.setInfoSupplier(this::getInfo); + this.statisticAggregation = requireNonNull(statisticAggregation, "statisticAggregation is null"); + this.types = ImmutableList.copyOf(requireNonNull(types, "types is null")); } @Override @@ -156,6 +171,7 @@ public void finish() finishFuture = pageSink.finish(); blocked = toListenableFuture(finishFuture); updateWrittenBytes(); + statisticAggregation.finish(); } } @@ -177,13 +193,13 @@ public ListenableFuture isBlocked() public boolean needsInput() { updateBlockedIfNecessary(); - return state == State.RUNNING && blocked == NOT_BLOCKED; + return state == State.RUNNING && blocked == NOT_BLOCKED && statisticAggregation.needsInput(); } private void updateBlockedIfNecessary() { if (blocked != NOT_BLOCKED && blocked.isDone()) { - blocked = NOT_BLOCKED; + blocked = statisticAggregation.isBlocked(); } } @@ -193,11 +209,12 @@ public void addInput(Page page) requireNonNull(page, "page is null"); checkState(needsInput(), "Operator does not need input"); - Block[] blocks = new Block[inputChannels.size()]; - for (int outputChannel = 0; outputChannel < inputChannels.size(); outputChannel++) { - blocks[outputChannel] = page.getBlock(inputChannels.get(outputChannel)); + Block[] blocks = new Block[columnChannels.size()]; + for (int outputChannel = 0; outputChannel < columnChannels.size(); outputChannel++) { + blocks[outputChannel] = page.getBlock(columnChannels.get(outputChannel)); } + statisticAggregation.addInput(page); CompletableFuture future = pageSink.appendPage(new Page(blocks)); updateMemoryUsage(); if (!future.isDone()) { @@ -210,18 +227,57 @@ public void addInput(Page page) @Override public Page getOutput() { - if (state != State.FINISHING || !blocked.isDone()) { + if (!blocked.isDone()) { return null; } + + if (!statisticAggregation.isFinished()) { + Page aggregationOutput = statisticAggregation.getOutput(); + if (aggregationOutput == null) { + return null; + } + int positionCount = aggregationOutput.getPositionCount(); + Block[] outputBlocks = new Block[types.size()]; + for (int channel = 0; channel < types.size(); channel++) { + if (channel == ROW_COUNT_CHANNEL || channel == FRAGMENT_CHANNEL) { + outputBlocks[channel] = RunLengthEncodedBlock.create(types.get(channel), null, positionCount); + } + else { + outputBlocks[channel] = aggregationOutput.getBlock(channel - 2); + } + } + return new Page(positionCount, outputBlocks); + } + + if (state != State.FINISHING) { + return null; + } + state = State.FINISHED; + Page fragmentsPage = createFragmentsPage(); + int positionCount = fragmentsPage.getPositionCount(); + Block[] outputBlocks = new Block[types.size()]; + for (int channel = 0; channel < types.size(); channel++) { + if (channel == ROW_COUNT_CHANNEL || channel == FRAGMENT_CHANNEL) { + outputBlocks[channel] = fragmentsPage.getBlock(channel); + } + else { + outputBlocks[channel] = RunLengthEncodedBlock.create(types.get(channel), null, positionCount); + } + } + return new Page(positionCount, outputBlocks); + } + + private Page createFragmentsPage() + { Collection fragments = getFutureValue(finishFuture); committed = true; updateWrittenBytes(); // output page will only be constructed once, // so a new PageBuilder is constructed (instead of using PageBuilder.reset) - PageBuilder page = new PageBuilder(fragments.size() + 1, TYPES); + PageBuilder page = new PageBuilder(fragments.size() + 1, ImmutableList.of(types.get(ROW_COUNT_CHANNEL), types.get(FRAGMENT_CHANNEL))); BlockBuilder rowsBuilder = page.getBlockBuilder(0); BlockBuilder fragmentBuilder = page.getBlockBuilder(1); @@ -242,12 +298,14 @@ public Page getOutput() @Override public void close() + throws Exception { if (!closed) { closed = true; if (!committed) { pageSink.abort(); } + statisticAggregation.close(); } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/LocalExecutionPlanner.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/LocalExecutionPlanner.java index 23464a4553016..7284dc892547f 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/LocalExecutionPlanner.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/LocalExecutionPlanner.java @@ -49,6 +49,7 @@ import com.facebook.presto.operator.MetadataDeleteOperator.MetadataDeleteOperatorFactory; import com.facebook.presto.operator.NestedLoopJoinPagesBridge; import com.facebook.presto.operator.NestedLoopJoinPagesSupplier; +import com.facebook.presto.operator.NoOpOperator.NoOpOperatorFactory; import com.facebook.presto.operator.OperatorFactory; import com.facebook.presto.operator.OrderByOperator.OrderByOperatorFactory; import com.facebook.presto.operator.OutputFactory; @@ -116,6 +117,7 @@ import com.facebook.presto.sql.planner.optimizations.IndexJoinOptimizer; import com.facebook.presto.sql.planner.plan.AggregationNode; import com.facebook.presto.sql.planner.plan.AggregationNode.Aggregation; +import com.facebook.presto.sql.planner.plan.AggregationNode.Step; import com.facebook.presto.sql.planner.plan.AssignUniqueId; import com.facebook.presto.sql.planner.plan.Assignments; import com.facebook.presto.sql.planner.plan.DeleteNode; @@ -141,6 +143,7 @@ import com.facebook.presto.sql.planner.plan.SampleNode; import com.facebook.presto.sql.planner.plan.SemiJoinNode; import com.facebook.presto.sql.planner.plan.SortNode; +import com.facebook.presto.sql.planner.plan.StatisticAggregationsDescriptor; import com.facebook.presto.sql.planner.plan.TableFinishNode; import com.facebook.presto.sql.planner.plan.TableScanNode; import com.facebook.presto.sql.planner.plan.TableWriterNode; @@ -226,6 +229,8 @@ import static com.facebook.presto.sql.planner.SystemPartitioningHandle.FIXED_BROADCAST_DISTRIBUTION; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.SCALED_WRITER_DISTRIBUTION; import static com.facebook.presto.sql.planner.SystemPartitioningHandle.SINGLE_DISTRIBUTION; +import static com.facebook.presto.sql.planner.plan.AggregationNode.Step.FINAL; +import static com.facebook.presto.sql.planner.plan.AggregationNode.Step.PARTIAL; import static com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.LOCAL; import static com.facebook.presto.sql.planner.plan.JoinNode.Type.FULL; import static com.facebook.presto.sql.planner.plan.JoinNode.Type.RIGHT; @@ -251,6 +256,7 @@ import static com.google.common.collect.Iterables.concat; import static com.google.common.collect.Iterables.getOnlyElement; import static com.google.common.collect.Range.closedOpen; +import static io.airlift.units.DataSize.Unit.BYTE; import static java.lang.String.format; import static java.util.Collections.emptyList; import static java.util.Objects.requireNonNull; @@ -2082,6 +2088,44 @@ public PhysicalOperation visitTableWriter(TableWriterNode node, LocalExecutionPl // serialize writes by forcing data through a single writer PhysicalOperation source = node.getSource().accept(this, context); + ImmutableMap.Builder outputMapping = ImmutableMap.builder(); + // partialrows + outputMapping.put(node.getOutputSymbols().get(0), 0); + // fragment + outputMapping.put(node.getOutputSymbols().get(1), 1); + + OperatorFactory statisticsAggregation = node.getStatisticsAggregation().map(aggregation -> { + List groupingSymbols = aggregation.getGroupingSymbols(); + if (groupingSymbols.isEmpty()) { + return createAggregationOperatorFactory( + node.getId(), + aggregation.getAggregations(), + PARTIAL, + 2, + outputMapping, + source, + context); + } + else { + return createHashAggregationOperatorFactory( + node.getId(), + aggregation.getAggregations(), + ImmutableList.of(groupingSymbols), + groupingSymbols, + PARTIAL, + Optional.empty(), + Optional.empty(), + source, + false, + false, + false, + new DataSize(0, BYTE), + context, + 2, + outputMapping); + } + }).orElse(new NoOpOperatorFactory(context.getNextOperatorId(), node.getId())); + List inputChannels = node.getColumns().stream() .map(source::symbolToChannel) .collect(toImmutableList()); @@ -2092,14 +2136,11 @@ public PhysicalOperation visitTableWriter(TableWriterNode node, LocalExecutionPl pageSinkManager, node.getTarget(), inputChannels, - session); - - Map layout = ImmutableMap.builder() - .put(node.getOutputSymbols().get(0), 0) - .put(node.getOutputSymbols().get(1), 1) - .build(); + session, + statisticsAggregation, + getSymbolTypes(node.getOutputSymbols(), context.getTypes())); - return new PhysicalOperation(operatorFactory, layout, context, source); + return new PhysicalOperation(operatorFactory, outputMapping.build(), context, source); } @Override @@ -2107,7 +2148,51 @@ public PhysicalOperation visitTableFinish(TableFinishNode node, LocalExecutionPl { PhysicalOperation source = node.getSource().accept(this, context); - OperatorFactory operatorFactory = new TableFinishOperatorFactory(context.getNextOperatorId(), node.getId(), createTableFinisher(session, node, metadata)); + ImmutableMap.Builder outputMapping = ImmutableMap.builder(); + + OperatorFactory statisticsAggregation = node.getStatisticsAggregation().map(aggregation -> { + List groupingSymbols = aggregation.getGroupingSymbols(); + if (groupingSymbols.isEmpty()) { + return createAggregationOperatorFactory( + node.getId(), + aggregation.getAggregations(), + FINAL, + 0, + outputMapping, + source, + context); + } + else { + return createHashAggregationOperatorFactory( + node.getId(), + aggregation.getAggregations(), + ImmutableList.of(groupingSymbols), + groupingSymbols, + FINAL, + Optional.empty(), + Optional.empty(), + source, + false, + false, + false, + new DataSize(0, BYTE), + context, + 0, + outputMapping); + } + }).orElse(new NoOpOperatorFactory(context.getNextOperatorId(), node.getId())); + + Map aggregationOutput = outputMapping.build(); + StatisticAggregationsDescriptor descriptor = node.getStatisticsAggregationDescriptor() + .map(desc -> desc.map(aggregationOutput::get)) + .orElse(StatisticAggregationsDescriptor.builder().build()); + + OperatorFactory operatorFactory = new TableFinishOperatorFactory( + context.getNextOperatorId(), + node.getId(), + createTableFinisher(session, node, metadata), + statisticsAggregation, + descriptor); Map layout = ImmutableMap.of(node.getOutputSymbols().get(0), 0); return new PhysicalOperation(operatorFactory, layout, context, source); @@ -2298,20 +2383,30 @@ private AccumulatorFactory buildAccumulatorFactory( private PhysicalOperation planGlobalAggregation(AggregationNode node, PhysicalOperation source, LocalExecutionPlanContext context) { - int outputChannel = 0; ImmutableMap.Builder outputMappings = ImmutableMap.builder(); - List accumulatorFactories = new ArrayList<>(); - for (Map.Entry entry : node.getAggregations().entrySet()) { + AggregationOperatorFactory operatorFactory = createAggregationOperatorFactory(node.getId(), node.getAggregations(), node.getStep(), 0, outputMappings, source, context); + return new PhysicalOperation(operatorFactory, outputMappings.build(), context, source); + } + + private AggregationOperatorFactory createAggregationOperatorFactory( + PlanNodeId planNodeId, + Map aggregations, + Step step, + int startOutputChannel, + ImmutableMap.Builder outputMappings, + PhysicalOperation source, + LocalExecutionPlanContext context) + { + int outputChannel = startOutputChannel; + ImmutableList.Builder accumulatorFactories = ImmutableList.builder(); + for (Map.Entry entry : aggregations.entrySet()) { Symbol symbol = entry.getKey(); Aggregation aggregation = entry.getValue(); accumulatorFactories.add(buildAccumulatorFactory(source, aggregation)); outputMappings.put(symbol, outputChannel); // one aggregation per channel outputChannel++; } - - OperatorFactory operatorFactory = new AggregationOperatorFactory(context.getNextOperatorId(), node.getId(), node.getStep(), accumulatorFactories); - - return new PhysicalOperation(operatorFactory, outputMappings.build(), context, source); + return new AggregationOperatorFactory(context.getNextOperatorId(), planNodeId, step, accumulatorFactories.build()); } private PhysicalOperation planGroupByAggregation( @@ -2321,11 +2416,46 @@ private PhysicalOperation planGroupByAggregation( DataSize unspillMemoryLimit, LocalExecutionPlanContext context) { - List groupBySymbols = node.getGroupingKeys(); - + ImmutableMap.Builder mappings = ImmutableMap.builder(); + OperatorFactory operatorFactory = createHashAggregationOperatorFactory( + node.getId(), + node.getAggregations(), + node.getGroupingSets(), + node.getGroupingKeys(), + node.getStep(), + node.getHashSymbol(), + node.getGroupIdSymbol(), + source, + node.hasDefaultOutput(), + spillEnabled, + node.isStreamable(), + unspillMemoryLimit, + context, + 0, + mappings); + return new PhysicalOperation(operatorFactory, mappings.build(), context, source); + } + + private OperatorFactory createHashAggregationOperatorFactory( + PlanNodeId planNodeId, + Map aggregations, + List> groupingSets, + List groupBySymbols, + Step step, + Optional hashSymbol, + Optional groupIdSymbol, + PhysicalOperation source, + boolean hasDefaultOutput, + boolean spillEnabled, + boolean isStreamable, + DataSize unspillMemoryLimit, + LocalExecutionPlanContext context, + int startOutputChannel, + ImmutableMap.Builder outputMappings) + { List aggregationOutputSymbols = new ArrayList<>(); List accumulatorFactories = new ArrayList<>(); - for (Map.Entry entry : node.getAggregations().entrySet()) { + for (Map.Entry entry : aggregations.entrySet()) { Symbol symbol = entry.getKey(); Aggregation aggregation = entry.getValue(); @@ -2334,23 +2464,26 @@ private PhysicalOperation planGroupByAggregation( } ImmutableList.Builder globalAggregationGroupIds = ImmutableList.builder(); - for (int i = 0; i < node.getGroupingSets().size(); i++) { - if (node.getGroupingSets().get(i).isEmpty()) { + for (int i = 0; i < groupingSets.size(); i++) { + if (groupingSets.get(i).isEmpty()) { globalAggregationGroupIds.add(i); } } - ImmutableMap.Builder outputMappings = ImmutableMap.builder(); // add group-by key fields each in a separate channel - int channel = 0; + int channel = startOutputChannel; + Optional groupIdChannel = Optional.empty(); for (Symbol symbol : groupBySymbols) { outputMappings.put(symbol, channel); + if (groupIdSymbol.isPresent() && groupIdSymbol.get().equals(symbol)) { + groupIdChannel = Optional.of(channel); + } channel++; } // hashChannel follows the group by channels - if (node.getHashSymbol().isPresent()) { - outputMappings.put(node.getHashSymbol().get(), channel++); + if (hashSymbol.isPresent()) { + outputMappings.put(hashSymbol.get(), channel++); } // aggregations go in following channels @@ -2366,32 +2499,30 @@ private PhysicalOperation planGroupByAggregation( Map mappings = outputMappings.build(); - OperatorFactory operatorFactory; - - if (node.isStreamable()) { - operatorFactory = new StreamingAggregationOperatorFactory( + if (isStreamable) { + return new StreamingAggregationOperatorFactory( context.getNextOperatorId(), - node.getId(), + planNodeId, source.getTypes(), groupByTypes, groupByChannels, - node.getStep(), + step, accumulatorFactories, joinCompiler); } else { - Optional hashChannel = node.getHashSymbol().map(channelGetter(source)); - operatorFactory = new HashAggregationOperatorFactory( + Optional hashChannel = hashSymbol.map(channelGetter(source)); + return new HashAggregationOperatorFactory( context.getNextOperatorId(), - node.getId(), + planNodeId, groupByTypes, groupByChannels, globalAggregationGroupIds.build(), - node.getStep(), - node.hasDefaultOutput(), + step, + hasDefaultOutput, accumulatorFactories, hashChannel, - node.getGroupIdSymbol().map(mappings::get), + groupIdChannel, 10_000, maxPartialAggregationMemorySize, spillEnabled, @@ -2399,8 +2530,6 @@ private PhysicalOperation planGroupByAggregation( spillerFactory, joinCompiler); } - - return new PhysicalOperation(operatorFactory, mappings, context, source); } } @@ -2415,12 +2544,12 @@ private static List getTypes(List expressions, Map { + return (fragments, statistics) -> { if (target instanceof CreateHandle) { - return metadata.finishCreateTable(session, ((CreateHandle) target).getHandle(), fragments); + return metadata.finishCreateTable(session, ((CreateHandle) target).getHandle(), fragments, statistics); } else if (target instanceof InsertHandle) { - return metadata.finishInsert(session, ((InsertHandle) target).getHandle(), fragments); + return metadata.finishInsert(session, ((InsertHandle) target).getHandle(), fragments, statistics); } else if (target instanceof DeleteHandle) { metadata.finishDelete(session, ((DeleteHandle) target).getHandle(), fragments); diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java index 9b585ec61fe76..bff3b04c2898d 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java @@ -23,6 +23,7 @@ import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorTableMetadata; import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; import com.facebook.presto.spi.type.Type; import com.facebook.presto.sql.analyzer.Analysis; import com.facebook.presto.sql.analyzer.Field; @@ -30,6 +31,7 @@ import com.facebook.presto.sql.analyzer.RelationType; import com.facebook.presto.sql.analyzer.Scope; import com.facebook.presto.sql.parser.SqlParser; +import com.facebook.presto.sql.planner.StatisticsAggregationPlanner.CreateStatisticAggregationsResult; import com.facebook.presto.sql.planner.optimizations.PlanOptimizer; import com.facebook.presto.sql.planner.plan.Assignments; import com.facebook.presto.sql.planner.plan.DeleteNode; @@ -38,6 +40,7 @@ import com.facebook.presto.sql.planner.plan.OutputNode; import com.facebook.presto.sql.planner.plan.PlanNode; import com.facebook.presto.sql.planner.plan.ProjectNode; +import com.facebook.presto.sql.planner.plan.StatisticAggregations; import com.facebook.presto.sql.planner.plan.TableFinishNode; import com.facebook.presto.sql.planner.plan.TableWriterNode; import com.facebook.presto.sql.planner.plan.ValuesNode; @@ -58,10 +61,12 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import java.util.AbstractMap.SimpleImmutableEntry; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import static com.facebook.presto.spi.StandardErrorCode.NOT_FOUND; @@ -73,7 +78,10 @@ import static com.facebook.presto.sql.planner.plan.TableWriterNode.WriterTarget; import static com.facebook.presto.sql.planner.sanity.PlanSanityChecker.DISTRIBUTED_PLAN_SANITY_CHECKER; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.Streams.zip; import static java.lang.String.format; import static java.util.Objects.requireNonNull; @@ -92,6 +100,7 @@ public enum Stage private final SymbolAllocator symbolAllocator = new SymbolAllocator(); private final Metadata metadata; private final SqlParser sqlParser; + private final StatisticsAggregationPlanner statisticsAggregationPlanner; public LogicalPlanner(Session session, List planOptimizers, @@ -122,6 +131,7 @@ public LogicalPlanner(Session session, this.idAllocator = idAllocator; this.metadata = metadata; this.sqlParser = sqlParser; + this.statisticsAggregationPlanner = new StatisticsAggregationPlanner(symbolAllocator, metadata); } public Plan plan(Analysis analysis) @@ -216,12 +226,15 @@ private RelationPlan createTableCreationPlan(Analysis analysis, Query query) .map(ColumnMetadata::getName) .collect(toImmutableList()); + TableStatisticsMetadata statisticsMetadata = metadata.getNewTableStatisticsMetadata(session, destination.getCatalogName(), tableMetadata); + return createTableWriterPlan( analysis, plan, new CreateName(destination.getCatalogName(), tableMetadata, newTableLayout), columnNames, - newTableLayout); + newTableLayout, + statisticsMetadata); } private RelationPlan createInsertPlan(Analysis analysis, Insert insertStatement) @@ -275,13 +288,15 @@ private RelationPlan createInsertPlan(Analysis analysis, Insert insertStatement) plan = new RelationPlan(projectNode, scope, projectNode.getOutputSymbols()); Optional newTableLayout = metadata.getInsertLayout(session, insert.getTarget()); + TableStatisticsMetadata statisticsMetadata = metadata.getInsertStatisticsMetadata(session, insert.getTarget()); return createTableWriterPlan( analysis, plan, new InsertReference(insert.getTarget()), visibleTableColumnNames, - newTableLayout); + newTableLayout, + statisticsMetadata); } private RelationPlan createTableWriterPlan( @@ -289,12 +304,9 @@ private RelationPlan createTableWriterPlan( RelationPlan plan, WriterTarget target, List columnNames, - Optional writeTableLayout) + Optional writeTableLayout, + TableStatisticsMetadata statistics) { - List writerOutputs = ImmutableList.of( - symbolAllocator.newSymbol("partialrows", BIGINT), - symbolAllocator.newSymbol("fragment", VARBINARY)); - PlanNode source = plan.getRoot(); if (!analysis.isCreateTableAsSelectWithData()) { @@ -325,23 +337,80 @@ private RelationPlan createTableWriterPlan( outputLayout)); } - PlanNode writerNode = new TableWriterNode( - idAllocator.getNextId(), - source, - target, - symbols, - columnNames, - writerOutputs, - partitioningScheme); + List writerOutputs = ImmutableList.of( + symbolAllocator.newSymbol("partialrows", BIGINT), + symbolAllocator.newSymbol("fragment", VARBINARY)); + + List commitOutputs = ImmutableList.of(symbolAllocator.newSymbol("rows", BIGINT)); + + if (!statistics.isEmpty()) { + verify(columnNames.size() == symbols.size(), "columnNames.size() != symbols.size(): %s != %s", columnNames.size(), symbols.size()); + Map columnToSymbolMap = zip(columnNames.stream(), symbols.stream(), SimpleImmutableEntry::new) + .collect(toImmutableMap(Entry::getKey, Entry::getValue)); + + CreateStatisticAggregationsResult result = statisticsAggregationPlanner.createStatisticsAggregation(statistics, columnToSymbolMap); + + Map projections = result.getProjections(); + Assignments assignments = Assignments.builder() + .putIdentities(source.getOutputSymbols()) + .putAll(projections) + .build(); + ProjectNode projectNode = new ProjectNode( + idAllocator.getNextId(), + source, + assignments); + + StatisticAggregations.Parts aggregations = result.getAggregations().split(symbolAllocator, metadata.getFunctionRegistry()); + + // partial aggregation is run within the TableWriteOperator to calculate the statistics for + // the data consumed by the TableWriteOperator + // final aggregation is run within the TableFinishOperator to summarize collected statistics + // by the partial aggregation from all of the writer nodes + StatisticAggregations partialAggregation = aggregations.getPartialAggregation(); + ImmutableList.Builder writerOutputSymbols = ImmutableList.builder(); + writerOutputSymbols.addAll(writerOutputs); + writerOutputSymbols.addAll(partialAggregation.getGroupingSymbols()); + writerOutputSymbols.addAll(partialAggregation.getAggregations().keySet()); + + PlanNode writerNode = new TableWriterNode( + idAllocator.getNextId(), + projectNode, + target, + symbols, + columnNames, + writerOutputSymbols.build(), + partitioningScheme, + Optional.of(partialAggregation), + Optional.of(result.getDescriptor().map(aggregations.getMappings()::get))); + + TableFinishNode commitNode = new TableFinishNode( + idAllocator.getNextId(), + writerNode, + target, + commitOutputs, + Optional.of(aggregations.getFinalAggregation()), + Optional.of(result.getDescriptor())); + + return new RelationPlan(commitNode, analysis.getRootScope(), commitOutputs); + } - List outputs = ImmutableList.of(symbolAllocator.newSymbol("rows", BIGINT)); TableFinishNode commitNode = new TableFinishNode( idAllocator.getNextId(), - writerNode, + new TableWriterNode( + idAllocator.getNextId(), + source, + target, + symbols, + columnNames, + writerOutputs, + partitioningScheme, + Optional.empty(), + Optional.empty()), target, - outputs); - - return new RelationPlan(commitNode, analysis.getRootScope(), outputs); + commitOutputs, + Optional.empty(), + Optional.empty()); + return new RelationPlan(commitNode, analysis.getRootScope(), commitOutputs); } private RelationPlan createDeletePlan(Analysis analysis, Delete node) @@ -350,7 +419,13 @@ private RelationPlan createDeletePlan(Analysis analysis, Delete node) .plan(node); List outputs = ImmutableList.of(symbolAllocator.newSymbol("rows", BIGINT)); - TableFinishNode commitNode = new TableFinishNode(idAllocator.getNextId(), deleteNode, deleteNode.getTarget(), outputs); + TableFinishNode commitNode = new TableFinishNode( + idAllocator.getNextId(), + deleteNode, + deleteNode.getTarget(), + outputs, + Optional.empty(), + Optional.empty()); return new RelationPlan(commitNode, analysis.getScope(node), commitNode.getOutputSymbols()); } @@ -413,7 +488,7 @@ private static List getOutputTableColumns(RelationPlan plan, Opt private static Map, Symbol> buildLambdaDeclarationToSymbolMap(Analysis analysis, SymbolAllocator symbolAllocator) { Map, Symbol> resultMap = new LinkedHashMap<>(); - for (Map.Entry, Type> entry : analysis.getTypes().entrySet()) { + for (Entry, Type> entry : analysis.getTypes().entrySet()) { if (!(entry.getKey().getNode() instanceof LambdaArgumentDeclaration)) { continue; } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/StatisticsAggregationPlanner.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/StatisticsAggregationPlanner.java new file mode 100644 index 0000000000000..fbd5f7ece704f --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/StatisticsAggregationPlanner.java @@ -0,0 +1,264 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner; + +import com.facebook.presto.metadata.FunctionRegistry; +import com.facebook.presto.metadata.Metadata; +import com.facebook.presto.metadata.Signature; +import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.statistics.ColumnStatisticMetadata; +import com.facebook.presto.spi.statistics.ColumnStatisticType; +import com.facebook.presto.spi.statistics.TableStatisticType; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; +import com.facebook.presto.spi.type.Type; +import com.facebook.presto.sql.analyzer.TypeSignatureProvider; +import com.facebook.presto.sql.planner.plan.AggregationNode; +import com.facebook.presto.sql.planner.plan.StatisticAggregations; +import com.facebook.presto.sql.planner.plan.StatisticAggregationsDescriptor; +import com.facebook.presto.sql.tree.Cast; +import com.facebook.presto.sql.tree.Expression; +import com.facebook.presto.sql.tree.FunctionCall; +import com.facebook.presto.sql.tree.QualifiedName; +import com.facebook.presto.sql.tree.SymbolReference; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +import java.util.AbstractMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.facebook.presto.metadata.FunctionRegistry.mangleOperatorName; +import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; +import static com.facebook.presto.spi.function.OperatorType.HASH_CODE; +import static com.facebook.presto.spi.statistics.TableStatisticType.ROW_COUNT; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.spi.type.Varchars.isVarcharType; +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.Iterables.getOnlyElement; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; + +public class StatisticsAggregationPlanner +{ + private final SymbolAllocator symbolAllocator; + private final Metadata metadata; + + public StatisticsAggregationPlanner(SymbolAllocator symbolAllocator, Metadata metadata) + { + this.symbolAllocator = requireNonNull(symbolAllocator, "symbolAllocator is null"); + this.metadata = requireNonNull(metadata, "metadata is null"); + } + + public CreateStatisticAggregationsResult createStatisticsAggregation(TableStatisticsMetadata statisticsMetadata, Map columnToSymbolMap) + { + StatisticAggregationsDescriptor.Builder descriptor = StatisticAggregationsDescriptor.builder(); + + List groupingColumns = statisticsMetadata.getGroupingColumns(); + List groupingSymbols = groupingColumns.stream() + .map(columnToSymbolMap::get) + .collect(toImmutableList()); + + for (int i = 0; i < groupingSymbols.size(); i++) { + descriptor.addGrouping(groupingSymbols.get(i), groupingColumns.get(i)); + } + + ImmutableMap.Builder aggregations = ImmutableMap.builder(); + + for (TableStatisticType type : statisticsMetadata.getTableStatistics()) { + if (type != ROW_COUNT) { + throw new PrestoException(NOT_SUPPORTED, "Table-wide statistic type not supported: " + type); + } + } + + FunctionRegistry functionRegistry = metadata.getFunctionRegistry(); + if (!statisticsMetadata.getTableStatistics().isEmpty()) { + QualifiedName count = QualifiedName.of("count"); + AggregationNode.Aggregation aggregation = new AggregationNode.Aggregation( + new FunctionCall(count, ImmutableList.of()), + functionRegistry.resolveFunction(count, ImmutableList.of()), + Optional.empty()); + Symbol symbol = symbolAllocator.newSymbol("rowCount", BIGINT); + aggregations.put(symbol, aggregation); + descriptor.addTableStatistic(symbol, ROW_COUNT); + } + + ImmutableMap.Builder projections = ImmutableMap.builder(); + for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) { + String columnName = columnStatisticMetadata.getColumnName(); + ColumnStatisticType statisticType = columnStatisticMetadata.getStatisticType(); + Symbol inputSymbol = columnToSymbolMap.get(columnName); + verify(inputSymbol != null, "inputSymbol is null"); + Type inputType = requireNonNull(symbolAllocator.getTypes().get(inputSymbol), "inputType is null"); + SingleColumnStatisticAggregation aggregation = createColumnAggregation(statisticType, inputSymbol, inputType); + Symbol symbol = symbolAllocator.newSymbol(statisticType + ":" + columnName, aggregation.getOutputType()); + aggregations.put(symbol, aggregation.getAggregation()); + descriptor.addColumnStatistic(symbol, columnStatisticMetadata); + aggregation.getInputProjection().ifPresent(projections::put); + } + + StatisticAggregations aggregation = new StatisticAggregations(aggregations.build(), groupingSymbols); + return new CreateStatisticAggregationsResult(aggregation, descriptor.build(), projections.build()); + } + + private SingleColumnStatisticAggregation createColumnAggregation(ColumnStatisticType statisticType, Symbol input, Type inputType) + { + switch (statisticType) { + case MIN: { + checkArgument(inputType.isOrderable(), "Input type is not orderable: %s", inputType); + return createAggregation(QualifiedName.of("min"), input.toSymbolReference(), inputType, inputType); + } + case MAX: { + checkArgument(inputType.isOrderable(), "Input type is not orderable: %s", inputType); + return createAggregation(QualifiedName.of("max"), input.toSymbolReference(), inputType, inputType); + } + case NUMBER_OF_DISTINCT_VALUES: { + checkArgument(inputType.isComparable(), "Input type is not comparable: %s", inputType); + // TODO: IMPLEMENT GENERIC APPROX DISTINCT + if (!isApproxDistinctAvailable(inputType)) { + FunctionCall hashCode = new FunctionCall(QualifiedName.of(mangleOperatorName(HASH_CODE)), ImmutableList.of(input.toSymbolReference())); + return createAggregation(QualifiedName.of("approx_distinct"), hashCode, BIGINT, BIGINT); + } + return createAggregation(QualifiedName.of("approx_distinct"), input.toSymbolReference(), inputType, BIGINT); + } + case NUMBER_OF_NON_NULL_VALUES: { + return createAggregation(QualifiedName.of("count"), input.toSymbolReference(), inputType, BIGINT); + } + case MAX_VALUE_SIZE_IN_BYTES: { + if (!inputType.equals(VARBINARY) && !isVarcharType(inputType)) { + throw new PrestoException(NOT_SUPPORTED, format("Unsupported statistic %s for type: %s", statisticType, inputType)); + } + Expression expression = new FunctionCall(QualifiedName.of("length"), ImmutableList.of(inputType.equals(VARBINARY) ? input.toSymbolReference() : new Cast(input.toSymbolReference(), "VARBINARY"))); + return createAggregation(QualifiedName.of("max"), expression, BIGINT, BIGINT); + } + case AVERAGE_VALUE_SIZE_IN_BYTES: { + if (!inputType.equals(VARBINARY) && !isVarcharType(inputType)) { + throw new PrestoException(NOT_SUPPORTED, format("Unsupported statistic %s for type: %s", statisticType, inputType)); + } + Expression expression = new FunctionCall(QualifiedName.of("length"), ImmutableList.of(inputType.equals(VARBINARY) ? input.toSymbolReference() : new Cast(input.toSymbolReference(), "VARBINARY"))); + return createAggregation(QualifiedName.of("avg"), expression, BIGINT, BIGINT); + } + case NUMBER_OF_TRUE_VALUES: { + checkArgument(BOOLEAN.equals(inputType), "invalid input type %s for statistic type %s", inputType, statisticType); + return createAggregation(QualifiedName.of("count_if"), input.toSymbolReference(), BOOLEAN, BIGINT); + } + default: + throw new IllegalArgumentException("Unsupported statistic type: " + statisticType); + } + } + + private static boolean isApproxDistinctAvailable(Type inputType) + { + return inputType.equals(BIGINT) || inputType.equals(DOUBLE) || isVarcharType(inputType) || inputType.equals(VARBINARY); + } + + private SingleColumnStatisticAggregation createAggregation(QualifiedName functionName, Expression expression, Type inputType, Type outputType) + { + Signature signature = metadata.getFunctionRegistry().resolveFunction(functionName, TypeSignatureProvider.fromTypes(ImmutableList.of(inputType))); + Type resolvedType = metadata.getType(getOnlyElement(signature.getArgumentTypes())); + Expression inputExpression = expression; + if (!resolvedType.equals(inputType)) { + inputExpression = new Cast( + expression, + resolvedType.getTypeSignature().toString(), + false, + metadata.getTypeManager().isTypeOnlyCoercion(inputType, resolvedType)); + } + + SymbolReference inputSymbolReference; + Optional> inputProjection; + if (inputExpression instanceof SymbolReference) { + inputProjection = Optional.empty(); + inputSymbolReference = (SymbolReference) inputExpression; + } + else { + Symbol inputSymbol = symbolAllocator.newSymbol(inputExpression, resolvedType); + inputProjection = Optional.of(new AbstractMap.SimpleImmutableEntry<>(inputSymbol, inputExpression)); + inputSymbolReference = inputSymbol.toSymbolReference(); + } + return new SingleColumnStatisticAggregation( + new AggregationNode.Aggregation( + new FunctionCall(functionName, ImmutableList.of(inputSymbolReference)), + signature, + Optional.empty()), + outputType, + inputProjection); + } + + public static class CreateStatisticAggregationsResult + { + private final StatisticAggregations aggregations; + private final StatisticAggregationsDescriptor descriptor; + private final Map projections; + + private CreateStatisticAggregationsResult( + StatisticAggregations aggregations, + StatisticAggregationsDescriptor descriptor, + Map projections) + { + this.aggregations = requireNonNull(aggregations, "statisticAggregations is null"); + this.descriptor = requireNonNull(descriptor, "descriptor is null"); + this.projections = ImmutableMap.copyOf(requireNonNull(projections, "projections is null")); + } + + public StatisticAggregations getAggregations() + { + return aggregations; + } + + public StatisticAggregationsDescriptor getDescriptor() + { + return descriptor; + } + + public Map getProjections() + { + return projections; + } + } + + public static class SingleColumnStatisticAggregation + { + private final AggregationNode.Aggregation aggregation; + private final Type outputType; + private final Optional> inputProjection; + + private SingleColumnStatisticAggregation(AggregationNode.Aggregation aggregation, Type outputType, Optional> inputProjection) + { + this.aggregation = requireNonNull(aggregation, "aggregation is null"); + this.outputType = requireNonNull(outputType, "outputType is null"); + this.inputProjection = requireNonNull(inputProjection, "inputProjection is null").map(AbstractMap.SimpleImmutableEntry::new); + } + + public AggregationNode.Aggregation getAggregation() + { + return aggregation; + } + + public Type getOutputType() + { + return outputType; + } + + public Optional> getInputProjection() + { + return inputProjection; + } + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/PushTableWriteThroughUnion.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/PushTableWriteThroughUnion.java index 07efa01d5b629..c10a7cf17b03a 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/PushTableWriteThroughUnion.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/iterative/rule/PushTableWriteThroughUnion.java @@ -19,18 +19,24 @@ import com.facebook.presto.matching.Pattern; import com.facebook.presto.sql.planner.Symbol; import com.facebook.presto.sql.planner.iterative.Rule; +import com.facebook.presto.sql.planner.optimizations.SymbolMapper; import com.facebook.presto.sql.planner.plan.PlanNode; import com.facebook.presto.sql.planner.plan.TableWriterNode; import com.facebook.presto.sql.planner.plan.UnionNode; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableListMultimap; +import com.google.common.collect.ImmutableMap; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import static com.facebook.presto.SystemSessionProperties.isPushTableWriteThroughUnion; import static com.facebook.presto.matching.Capture.newCapture; import static com.facebook.presto.sql.planner.plan.Patterns.source; import static com.facebook.presto.sql.planner.plan.Patterns.tableWriterNode; import static com.facebook.presto.sql.planner.plan.Patterns.union; -import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; public class PushTableWriteThroughUnion implements Rule @@ -59,31 +65,57 @@ public boolean isEnabled(Session session) } @Override - public Result apply(TableWriterNode tableWriterNode, Captures captures, Context context) + public Result apply(TableWriterNode writerNode, Captures captures, Context context) { UnionNode unionNode = captures.get(CHILD); ImmutableList.Builder rewrittenSources = ImmutableList.builder(); - ImmutableListMultimap.Builder mappings = ImmutableListMultimap.builder(); - for (int i = 0; i < unionNode.getSources().size(); i++) { - int index = i; - ImmutableList.Builder newSymbols = ImmutableList.builder(); - for (Symbol outputSymbol : tableWriterNode.getOutputSymbols()) { + List> sourceMappings = new ArrayList<>(); + for (int source = 0; source < unionNode.getSources().size(); source++) { + rewrittenSources.add(rewriteSource(writerNode, unionNode, source, sourceMappings, context)); + } + + ImmutableListMultimap.Builder unionMappings = ImmutableListMultimap.builder(); + sourceMappings.forEach(mappings -> mappings.forEach(unionMappings::put)); + + return Result.ofPlanNode( + new UnionNode( + context.getIdAllocator().getNextId(), + rewrittenSources.build(), + unionMappings.build(), + ImmutableList.copyOf(unionMappings.build().keySet()))); + } + + private static TableWriterNode rewriteSource( + TableWriterNode writerNode, + UnionNode unionNode, + int source, + List> sourceMappings, + Context context) + { + Map inputMappings = getInputSymbolMapping(unionNode, source); + ImmutableMap.Builder mappings = ImmutableMap.builder(); + mappings.putAll(inputMappings); + ImmutableMap.Builder outputMappings = ImmutableMap.builder(); + for (Symbol outputSymbol : writerNode.getOutputSymbols()) { + if (inputMappings.containsKey(outputSymbol)) { + outputMappings.put(outputSymbol, inputMappings.get(outputSymbol)); + } + else { Symbol newSymbol = context.getSymbolAllocator().newSymbol(outputSymbol); - newSymbols.add(newSymbol); + outputMappings.put(outputSymbol, newSymbol); mappings.put(outputSymbol, newSymbol); } - rewrittenSources.add(new TableWriterNode( - context.getIdAllocator().getNextId(), - unionNode.getSources().get(index), - tableWriterNode.getTarget(), - tableWriterNode.getColumns().stream() - .map(column -> unionNode.getSymbolMapping().get(column).get(index)) - .collect(toImmutableList()), - tableWriterNode.getColumnNames(), - newSymbols.build(), - tableWriterNode.getPartitioningScheme())); } + sourceMappings.add(outputMappings.build()); + SymbolMapper symbolMapper = new SymbolMapper(mappings.build()); + return symbolMapper.map(writerNode, unionNode.getSources().get(source), context.getIdAllocator().getNextId()); + } - return Result.ofPlanNode(new UnionNode(context.getIdAllocator().getNextId(), rewrittenSources.build(), mappings.build(), ImmutableList.copyOf(mappings.build().keySet()))); + private static Map getInputSymbolMapping(UnionNode node, int source) + { + return node.getSymbolMapping() + .keySet() + .stream() + .collect(toImmutableMap(key -> key, key -> node.getSymbolMapping().get(key).get(source))); } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/BeginTableWrite.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/BeginTableWrite.java index d3fc4a23150e1..054c871b4c9e3 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/BeginTableWrite.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/BeginTableWrite.java @@ -97,7 +97,9 @@ public PlanNode visitTableWriter(TableWriterNode node, RewriteContext c node.getColumns(), node.getColumnNames(), node.getOutputSymbols(), - node.getPartitioningScheme()); + node.getPartitioningScheme(), + node.getStatisticsAggregation(), + node.getStatisticsAggregationDescriptor()); } @Override @@ -123,7 +125,13 @@ public PlanNode visitTableFinish(TableFinishNode node, RewriteContext c context.get().addMaterializedHandle(originalTarget, newTarget); child = child.accept(this, context); - return new TableFinishNode(node.getId(), child, newTarget, node.getOutputSymbols()); + return new TableFinishNode( + node.getId(), + child, + newTarget, + node.getOutputSymbols(), + node.getStatisticsAggregation(), + node.getStatisticsAggregationDescriptor()); } public TableWriterNode.WriterTarget getTarget(PlanNode node) diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PruneUnreferencedOutputs.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PruneUnreferencedOutputs.java index 9e6bf292b355a..c48498ca98e69 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PruneUnreferencedOutputs.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/PruneUnreferencedOutputs.java @@ -48,6 +48,7 @@ import com.facebook.presto.sql.planner.plan.SetOperationNode; import com.facebook.presto.sql.planner.plan.SimplePlanRewriter; import com.facebook.presto.sql.planner.plan.SortNode; +import com.facebook.presto.sql.planner.plan.StatisticAggregations; import com.facebook.presto.sql.planner.plan.TableFinishNode; import com.facebook.presto.sql.planner.plan.TableScanNode; import com.facebook.presto.sql.planner.plan.TableWriterNode; @@ -605,12 +606,15 @@ public PlanNode visitTableWriter(TableWriterNode node, RewriteContext expectedInputs.addAll(SymbolsExtractor.extractUnique(aggregation.getCall()))); + } PlanNode source = context.rewrite(node.getSource(), expectedInputs.build()); - return new TableWriterNode( node.getId(), source, @@ -618,15 +622,22 @@ public PlanNode visitTableWriter(TableWriterNode node, RewriteContext> context) { - // Maintain the existing inputs needed for TableCommitNode PlanNode source = context.rewrite(node.getSource(), ImmutableSet.copyOf(node.getSource().getOutputSymbols())); - return new TableFinishNode(node.getId(), source, node.getTarget(), node.getOutputSymbols()); + return new TableFinishNode( + node.getId(), + source, + node.getTarget(), + node.getOutputSymbols(), + node.getStatisticsAggregation(), + node.getStatisticsAggregationDescriptor()); } @Override diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/SymbolMapper.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/SymbolMapper.java index 15c93db71b582..6768336849ac2 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/SymbolMapper.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/SymbolMapper.java @@ -15,12 +15,17 @@ import com.facebook.presto.spi.block.SortOrder; import com.facebook.presto.sql.planner.OrderingScheme; +import com.facebook.presto.sql.planner.PartitioningScheme; import com.facebook.presto.sql.planner.PlanNodeIdAllocator; import com.facebook.presto.sql.planner.Symbol; import com.facebook.presto.sql.planner.plan.AggregationNode; import com.facebook.presto.sql.planner.plan.AggregationNode.Aggregation; import com.facebook.presto.sql.planner.plan.PlanNode; import com.facebook.presto.sql.planner.plan.PlanNodeId; +import com.facebook.presto.sql.planner.plan.StatisticAggregations; +import com.facebook.presto.sql.planner.plan.StatisticAggregationsDescriptor; +import com.facebook.presto.sql.planner.plan.TableFinishNode; +import com.facebook.presto.sql.planner.plan.TableWriterNode; import com.facebook.presto.sql.planner.plan.TopNNode; import com.facebook.presto.sql.tree.Expression; import com.facebook.presto.sql.tree.ExpressionRewriter; @@ -33,9 +38,11 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static java.util.Objects.requireNonNull; public class SymbolMapper @@ -82,14 +89,8 @@ public AggregationNode map(AggregationNode node, PlanNode source, PlanNodeIdAllo private AggregationNode map(AggregationNode node, PlanNode source, PlanNodeId newNodeId) { ImmutableMap.Builder aggregations = ImmutableMap.builder(); - for (Map.Entry entry : node.getAggregations().entrySet()) { - Symbol symbol = entry.getKey(); - Aggregation aggregation = entry.getValue(); - - aggregations.put(map(symbol), new Aggregation( - (FunctionCall) map(aggregation.getCall()), - aggregation.getSignature(), - aggregation.getMask().map(this::map))); + for (Entry entry : node.getAggregations().entrySet()) { + aggregations.put(map(entry.getKey()), map(entry.getValue())); } List> groupingSets = node.getGroupingSets().stream() @@ -107,6 +108,14 @@ private AggregationNode map(AggregationNode node, PlanNode source, PlanNodeId ne node.getGroupIdSymbol().map(this::map)); } + private Aggregation map(Aggregation aggregation) + { + return new Aggregation( + (FunctionCall) map(aggregation.getCall()), + aggregation.getSignature(), + aggregation.getMask().map(this::map)); + } + public TopNNode map(TopNNode node, PlanNode source, PlanNodeId newNodeId) { ImmutableList.Builder symbols = ImmutableList.builder(); @@ -129,6 +138,79 @@ public TopNNode map(TopNNode node, PlanNode source, PlanNodeId newNodeId) node.getStep()); } + public TableWriterNode map(TableWriterNode node, PlanNode source) + { + return map(node, source, node.getId()); + } + + public TableWriterNode map(TableWriterNode node, PlanNode source, PlanNodeId newNodeId) + { + // Intentionally does not use canonicalizeAndDistinct as that would remove columns + ImmutableList columns = node.getColumns().stream() + .map(this::map) + .collect(toImmutableList()); + + return new TableWriterNode( + newNodeId, + source, + node.getTarget(), + columns, + node.getColumnNames(), + map(node.getOutputSymbols()), + node.getPartitioningScheme().map(partitioningScheme -> canonicalizePartitionFunctionBinding(partitioningScheme, source)), + node.getStatisticsAggregation().map(this::map), + node.getStatisticsAggregationDescriptor().map(this::map)); + } + + public TableFinishNode map(TableFinishNode node, PlanNode source) + { + return new TableFinishNode( + node.getId(), + source, + node.getTarget(), + map(node.getOutputSymbols()), + node.getStatisticsAggregation().map(this::map), + node.getStatisticsAggregationDescriptor().map(descriptor -> descriptor.map(this::map))); + } + + private PartitioningScheme canonicalizePartitionFunctionBinding(PartitioningScheme scheme, PlanNode source) + { + Set addedOutputs = new HashSet<>(); + ImmutableList.Builder outputs = ImmutableList.builder(); + for (Symbol symbol : source.getOutputSymbols()) { + Symbol canonicalOutput = map(symbol); + if (addedOutputs.add(canonicalOutput)) { + outputs.add(canonicalOutput); + } + } + + return new PartitioningScheme( + scheme.getPartitioning().translate(this::map), + outputs.build(), + scheme.getHashColumn().map(this::map), + scheme.isReplicateNullsAndAny(), + scheme.getBucketToPartition()); + } + + private StatisticAggregations map(StatisticAggregations statisticAggregations) + { + Map aggregations = statisticAggregations.getAggregations().entrySet().stream() + .collect(toImmutableMap(entry -> map(entry.getKey()), entry -> map(entry.getValue()))); + return new StatisticAggregations(aggregations, map(statisticAggregations.getGroupingSymbols())); + } + + private StatisticAggregationsDescriptor map(StatisticAggregationsDescriptor descriptor) + { + return descriptor.map(this::map); + } + + private List map(List outputs) + { + return outputs.stream() + .map(this::map) + .collect(toImmutableList()); + } + private List mapAndDistinct(List outputs) { Set added = new HashSet<>(); diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/UnaliasSymbolReferences.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/UnaliasSymbolReferences.java index 842acf540cba2..4d174e4fdcedf 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/UnaliasSymbolReferences.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/optimizations/UnaliasSymbolReferences.java @@ -364,7 +364,9 @@ public PlanNode visitDelete(DeleteNode node, RewriteContext context) @Override public PlanNode visitTableFinish(TableFinishNode node, RewriteContext context) { - return context.defaultRewrite(node); + PlanNode source = context.rewrite(node.getSource()); + SymbolMapper mapper = new SymbolMapper(mapping); + return mapper.map(node, source); } @Override @@ -548,20 +550,8 @@ private static ImmutableList.Builder rewriteSources(SetOperationNode n public PlanNode visitTableWriter(TableWriterNode node, RewriteContext context) { PlanNode source = context.rewrite(node.getSource()); - - // Intentionally does not use canonicalizeAndDistinct as that would remove columns - ImmutableList columns = node.getColumns().stream() - .map(this::canonicalize) - .collect(toImmutableList()); - - return new TableWriterNode( - node.getId(), - source, - node.getTarget(), - columns, - node.getColumnNames(), - node.getOutputSymbols(), - node.getPartitioningScheme().map(partitioningScheme -> canonicalizePartitionFunctionBinding(partitioningScheme, source))); + SymbolMapper mapper = new SymbolMapper(mapping); + return mapper.map(node, source); } @Override @@ -716,24 +706,5 @@ private ListMultimap canonicalizeSetOperationSymbolMap(ListMulti } return builder.build(); } - - private PartitioningScheme canonicalizePartitionFunctionBinding(PartitioningScheme scheme, PlanNode source) - { - Set addedOutputs = new HashSet<>(); - ImmutableList.Builder outputs = ImmutableList.builder(); - for (Symbol symbol : source.getOutputSymbols()) { - Symbol canonicalOutput = canonicalize(symbol); - if (addedOutputs.add(canonicalOutput)) { - outputs.add(canonicalOutput); - } - } - - return new PartitioningScheme( - scheme.getPartitioning().translate(this::canonicalize), - outputs.build(), - canonicalize(scheme.getHashColumn()), - scheme.isReplicateNullsAndAny(), - scheme.getBucketToPartition()); - } } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/StatisticAggregations.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/StatisticAggregations.java new file mode 100644 index 0000000000000..b0a63f5aca460 --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/StatisticAggregations.java @@ -0,0 +1,114 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.plan; + +import com.facebook.presto.metadata.FunctionRegistry; +import com.facebook.presto.metadata.Signature; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; +import com.facebook.presto.sql.planner.Symbol; +import com.facebook.presto.sql.planner.SymbolAllocator; +import com.facebook.presto.sql.planner.plan.AggregationNode.Aggregation; +import com.facebook.presto.sql.tree.FunctionCall; +import com.facebook.presto.sql.tree.QualifiedName; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static java.util.Objects.requireNonNull; + +public class StatisticAggregations +{ + private final Map aggregations; + private final List groupingSymbols; + + @JsonCreator + public StatisticAggregations( + @JsonProperty("aggregations") Map aggregations, + @JsonProperty("groupingSymbols") List groupingSymbols) + { + this.aggregations = ImmutableMap.copyOf(requireNonNull(aggregations, "aggregations is null")); + this.groupingSymbols = ImmutableList.copyOf(requireNonNull(groupingSymbols, "groupingSymbols is null")); + } + + @JsonProperty + public Map getAggregations() + { + return aggregations; + } + + @JsonProperty + public List getGroupingSymbols() + { + return groupingSymbols; + } + + public Parts split(SymbolAllocator symbolAllocator, FunctionRegistry functionRegistry) + { + ImmutableMap.Builder intermediateAggregation = ImmutableMap.builder(); + ImmutableMap.Builder finalAggregation = ImmutableMap.builder(); + ImmutableMap.Builder mappings = ImmutableMap.builder(); + for (Map.Entry entry : aggregations.entrySet()) { + Aggregation originalAggregation = entry.getValue(); + Signature signature = originalAggregation.getSignature(); + InternalAggregationFunction function = functionRegistry.getAggregateFunctionImplementation(signature); + Symbol intermediateSymbol = symbolAllocator.newSymbol(signature.getName(), function.getIntermediateType()); + mappings.put(entry.getKey(), intermediateSymbol); + intermediateAggregation.put(intermediateSymbol, new Aggregation(originalAggregation.getCall(), signature, originalAggregation.getMask())); + finalAggregation.put(entry.getKey(), + new Aggregation( + new FunctionCall(QualifiedName.of(signature.getName()), ImmutableList.of(intermediateSymbol.toSymbolReference())), + signature, + Optional.empty())); + } + groupingSymbols.forEach(symbol -> mappings.put(symbol, symbol)); + return new Parts( + new StatisticAggregations(intermediateAggregation.build(), groupingSymbols), + new StatisticAggregations(finalAggregation.build(), groupingSymbols), + mappings.build()); + } + + public static class Parts + { + private final StatisticAggregations partialAggregation; + private final StatisticAggregations finalAggregation; + private final Map mappings; + + public Parts(StatisticAggregations partialAggregation, StatisticAggregations finalAggregation, Map mappings) + { + this.partialAggregation = requireNonNull(partialAggregation, "partialAggregation is null"); + this.finalAggregation = requireNonNull(finalAggregation, "finalAggregation is null"); + this.mappings = ImmutableMap.copyOf(requireNonNull(mappings, "mappings is null")); + } + + public StatisticAggregations getPartialAggregation() + { + return partialAggregation; + } + + public StatisticAggregations getFinalAggregation() + { + return finalAggregation; + } + + public Map getMappings() + { + return mappings; + } + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/StatisticAggregationsDescriptor.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/StatisticAggregationsDescriptor.java new file mode 100644 index 0000000000000..428a41b87329a --- /dev/null +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/StatisticAggregationsDescriptor.java @@ -0,0 +1,109 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.plan; + +import com.facebook.presto.spi.statistics.ColumnStatisticMetadata; +import com.facebook.presto.spi.statistics.TableStatisticType; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableMap; + +import java.util.Map; +import java.util.function.Function; + +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static java.util.Objects.requireNonNull; + +public class StatisticAggregationsDescriptor +{ + private final Map grouping; + private final Map tableStatistics; + private final Map columnStatistics; + + @JsonCreator + public StatisticAggregationsDescriptor( + @JsonProperty("grouping") Map grouping, + @JsonProperty("tableStatistics") Map tableStatistics, + @JsonProperty("columnStatistics") Map columnStatistics) + { + this.grouping = ImmutableMap.copyOf(requireNonNull(grouping, "grouping is null")); + this.tableStatistics = ImmutableMap.copyOf(requireNonNull(tableStatistics, "tableStatistics is null")); + this.columnStatistics = ImmutableMap.copyOf(requireNonNull(columnStatistics, "columnStatistics is null")); + } + + @JsonProperty + public Map getGrouping() + { + return grouping; + } + + @JsonProperty + public Map getTableStatistics() + { + return tableStatistics; + } + + @JsonProperty + public Map getColumnStatistics() + { + return columnStatistics; + } + + public static Builder builder() + { + return new Builder<>(); + } + + public StatisticAggregationsDescriptor map(Function mapper) + { + return new StatisticAggregationsDescriptor<>( + map(this.getGrouping(), mapper), + map(this.getTableStatistics(), mapper), + map(this.getColumnStatistics(), mapper)); + } + + private static Map map(Map input, Function mapper) + { + return input.entrySet() + .stream() + .collect(toImmutableMap(entry -> mapper.apply(entry.getKey()), Map.Entry::getValue)); + } + + public static class Builder + { + private final ImmutableMap.Builder grouping = ImmutableMap.builder(); + private final ImmutableMap.Builder tableStatistics = ImmutableMap.builder(); + private final ImmutableMap.Builder columnStatistics = ImmutableMap.builder(); + + public void addGrouping(T key, String column) + { + grouping.put(key, column); + } + + public void addTableStatistic(T key, TableStatisticType type) + { + tableStatistics.put(key, type); + } + + public void addColumnStatistic(T key, ColumnStatisticMetadata statisticMetadata) + { + columnStatistics.put(key, statisticMetadata); + } + + public StatisticAggregationsDescriptor build() + { + return new StatisticAggregationsDescriptor<>(grouping.build(), tableStatistics.build(), columnStatistics.build()); + } + } +} diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableFinishNode.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableFinishNode.java index 86d4bb2f29f90..a7e617e431661 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableFinishNode.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableFinishNode.java @@ -22,6 +22,7 @@ import javax.annotation.concurrent.Immutable; import java.util.List; +import java.util.Optional; import static com.facebook.presto.sql.planner.plan.TableWriterNode.WriterTarget; import static com.google.common.base.Preconditions.checkArgument; @@ -34,13 +35,17 @@ public class TableFinishNode private final PlanNode source; private final WriterTarget target; private final List outputs; + private final Optional statisticsAggregation; + private final Optional> statisticsAggregationDescriptor; @JsonCreator public TableFinishNode( @JsonProperty("id") PlanNodeId id, @JsonProperty("source") PlanNode source, @JsonProperty("target") WriterTarget target, - @JsonProperty("outputs") List outputs) + @JsonProperty("outputs") List outputs, + @JsonProperty("statisticsAggregation") Optional statisticsAggregation, + @JsonProperty("statisticsAggregationDescriptor") Optional> statisticsAggregationDescriptor) { super(id); @@ -48,6 +53,9 @@ public TableFinishNode( this.source = requireNonNull(source, "source is null"); this.target = requireNonNull(target, "target is null"); this.outputs = ImmutableList.copyOf(requireNonNull(outputs, "outputs is null")); + this.statisticsAggregation = requireNonNull(statisticsAggregation, "statisticsAggregation is null"); + this.statisticsAggregationDescriptor = requireNonNull(statisticsAggregationDescriptor, "statisticsAggregationDescriptor is null"); + checkArgument(statisticsAggregation.isPresent() == statisticsAggregationDescriptor.isPresent(), "statisticsAggregation and statisticsAggregationDescriptor must be either present or absent"); } @JsonProperty @@ -69,6 +77,18 @@ public List getOutputSymbols() return outputs; } + @JsonProperty + public Optional getStatisticsAggregation() + { + return statisticsAggregation; + } + + @JsonProperty + public Optional> getStatisticsAggregationDescriptor() + { + return statisticsAggregationDescriptor; + } + @Override public List getSources() { @@ -84,6 +104,12 @@ public R accept(PlanVisitor visitor, C context) @Override public PlanNode replaceChildren(List newChildren) { - return new TableFinishNode(getId(), Iterables.getOnlyElement(newChildren), target, outputs); + return new TableFinishNode( + getId(), + Iterables.getOnlyElement(newChildren), + target, + outputs, + statisticsAggregation, + statisticsAggregationDescriptor); } } diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableWriterNode.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableWriterNode.java index 06a31633288f6..c9e0ce16c5482 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableWriterNode.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/plan/TableWriterNode.java @@ -46,6 +46,8 @@ public class TableWriterNode private final List columns; private final List columnNames; private final Optional partitioningScheme; + private final Optional statisticsAggregation; + private final Optional> statisticsAggregationDescriptor; @JsonCreator public TableWriterNode( @@ -55,7 +57,9 @@ public TableWriterNode( @JsonProperty("columns") List columns, @JsonProperty("columnNames") List columnNames, @JsonProperty("outputs") List outputs, - @JsonProperty("partitioningScheme") Optional partitioningScheme) + @JsonProperty("partitioningScheme") Optional partitioningScheme, + @JsonProperty("statisticsAggregation") Optional statisticsAggregation, + @JsonProperty("statisticsAggregationDescriptor") Optional> statisticsAggregationDescriptor) { super(id); @@ -69,6 +73,9 @@ public TableWriterNode( this.columnNames = ImmutableList.copyOf(columnNames); this.outputs = ImmutableList.copyOf(requireNonNull(outputs, "outputs is null")); this.partitioningScheme = requireNonNull(partitioningScheme, "partitioningScheme is null"); + this.statisticsAggregation = requireNonNull(statisticsAggregation, "statisticsAggregation is null"); + this.statisticsAggregationDescriptor = requireNonNull(statisticsAggregationDescriptor, "statisticsAggregationDescriptor is null"); + checkArgument(statisticsAggregation.isPresent() == statisticsAggregationDescriptor.isPresent(), "statisticsAggregation and statisticsAggregationDescriptor must be either present or absent"); } @JsonProperty @@ -108,6 +115,18 @@ public Optional getPartitioningScheme() return partitioningScheme; } + @JsonProperty + public Optional getStatisticsAggregation() + { + return statisticsAggregation; + } + + @JsonProperty + public Optional> getStatisticsAggregationDescriptor() + { + return statisticsAggregationDescriptor; + } + @Override public List getSources() { @@ -123,7 +142,7 @@ public R accept(PlanVisitor visitor, C context) @Override public PlanNode replaceChildren(List newChildren) { - return new TableWriterNode(getId(), Iterables.getOnlyElement(newChildren), target, columns, columnNames, outputs, partitioningScheme); + return new TableWriterNode(getId(), Iterables.getOnlyElement(newChildren), target, columns, columnNames, outputs, partitioningScheme, statisticsAggregation, statisticsAggregationDescriptor); } @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "@type") diff --git a/presto-main/src/main/java/com/facebook/presto/sql/planner/planPrinter/PlanPrinter.java b/presto-main/src/main/java/com/facebook/presto/sql/planner/planPrinter/PlanPrinter.java index 64bcce9b5a89e..9e2ddd3d44441 100644 --- a/presto-main/src/main/java/com/facebook/presto/sql/planner/planPrinter/PlanPrinter.java +++ b/presto-main/src/main/java/com/facebook/presto/sql/planner/planPrinter/PlanPrinter.java @@ -35,6 +35,8 @@ import com.facebook.presto.spi.predicate.NullableValue; import com.facebook.presto.spi.predicate.Range; import com.facebook.presto.spi.predicate.TupleDomain; +import com.facebook.presto.spi.statistics.ColumnStatisticMetadata; +import com.facebook.presto.spi.statistics.TableStatisticType; import com.facebook.presto.spi.type.Type; import com.facebook.presto.sql.FunctionInvoker; import com.facebook.presto.sql.planner.OrderingScheme; @@ -77,6 +79,8 @@ import com.facebook.presto.sql.planner.plan.SampleNode; import com.facebook.presto.sql.planner.plan.SemiJoinNode; import com.facebook.presto.sql.planner.plan.SortNode; +import com.facebook.presto.sql.planner.plan.StatisticAggregations; +import com.facebook.presto.sql.planner.plan.StatisticAggregationsDescriptor; import com.facebook.presto.sql.planner.plan.TableFinishNode; import com.facebook.presto.sql.planner.plan.TableScanNode; import com.facebook.presto.sql.planner.plan.TableWriterNode; @@ -127,6 +131,7 @@ import static com.google.common.base.CaseFormat.UPPER_UNDERSCORE; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.getOnlyElement; import static io.airlift.units.DataSize.succinctBytes; @@ -134,6 +139,7 @@ import static java.lang.Double.isNaN; import static java.lang.String.format; import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.joining; import static java.util.stream.Collectors.toList; public class PlanPrinter @@ -1150,6 +1156,11 @@ public Void visitTableWriter(TableWriterNode node, Integer indent) print(indent + 2, "%s := %s", name, symbol); } + if (node.getStatisticsAggregation().isPresent()) { + verify(node.getStatisticsAggregationDescriptor().isPresent(), "statisticsAggregationDescriptor is not present"); + printStatisticAggregations(node.getStatisticsAggregation().get(), node.getStatisticsAggregationDescriptor().get(), indent + 2); + } + return processChildren(node, indent + 1); } @@ -1160,9 +1171,55 @@ public Void visitTableFinish(TableFinishNode node, Integer indent) printPlanNodesStatsAndCost(indent + 2, node); printStats(indent + 2, node.getId()); + if (node.getStatisticsAggregation().isPresent()) { + verify(node.getStatisticsAggregationDescriptor().isPresent(), "statisticsAggregationDescriptor is not present"); + printStatisticAggregations(node.getStatisticsAggregation().get(), node.getStatisticsAggregationDescriptor().get(), indent + 2); + } + return processChildren(node, indent + 1); } + private void printStatisticAggregations(StatisticAggregations aggregations, StatisticAggregationsDescriptor descriptor, int indent) + { + print(indent, "Collected statistics:"); + printStatisticAggregationsInfo(descriptor.getTableStatistics(), descriptor.getColumnStatistics(), aggregations.getAggregations(), indent + 1); + print(indent + 1, "grouped by => [%s]", getStatisticGroupingSetsInfo(aggregations.getGroupingSymbols(), descriptor.getGrouping())); + } + + private String getStatisticGroupingSetsInfo(List groupingSymbols, Map columnMappings) + { + return groupingSymbols.stream() + .map(symbol -> format("%s := %s", symbol, columnMappings.get(symbol))) + .collect(joining(", ")); + } + + private void printStatisticAggregationsInfo( + Map tableStatistics, + Map columnStatistics, + Map aggregations, + int indent) + { + print(indent, "aggregations =>"); + for (Map.Entry tableStatistic : tableStatistics.entrySet()) { + print( + indent + 1, + "%s => [%s := %s]", + tableStatistic.getValue(), + tableStatistic.getKey(), + aggregations.get(tableStatistic.getKey()).getCall()); + } + + for (Map.Entry columnStatistic : columnStatistics.entrySet()) { + print( + indent + 1, + "%s[%s] => [%s := %s]", + columnStatistic.getValue().getStatisticType(), + columnStatistic.getValue().getColumnName(), + columnStatistic.getKey(), + aggregations.get(columnStatistic.getKey()).getCall()); + } + } + @Override public Void visitSample(SampleNode node, Integer indent) { diff --git a/presto-main/src/main/java/com/facebook/presto/testing/TestingMetadata.java b/presto-main/src/main/java/com/facebook/presto/testing/TestingMetadata.java index f3ee9117318ff..8865966edf255 100644 --- a/presto-main/src/main/java/com/facebook/presto/testing/TestingMetadata.java +++ b/presto-main/src/main/java/com/facebook/presto/testing/TestingMetadata.java @@ -33,6 +33,7 @@ import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; import com.facebook.presto.spi.security.Privilege; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.type.Type; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; @@ -231,7 +232,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { return Optional.empty(); } @@ -243,7 +244,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { return Optional.empty(); } diff --git a/presto-main/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java b/presto-main/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java index c541ada18f6ff..fffa21ec223bb 100644 --- a/presto-main/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java +++ b/presto-main/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java @@ -28,7 +28,9 @@ import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.security.GrantInfo; import com.facebook.presto.spi.security.Privilege; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.statistics.TableStatistics; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.facebook.presto.spi.type.TypeSignature; @@ -243,7 +245,7 @@ public OutputTableHandle beginCreateTable(Session session, String catalogName, C } @Override - public Optional finishCreateTable(Session session, OutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(Session session, OutputTableHandle tableHandle, Collection fragments, List computedStatistics) { throw new UnsupportedOperationException(); } @@ -254,6 +256,18 @@ public Optional getInsertLayout(Session session, TableHandle tar throw new UnsupportedOperationException(); } + @Override + public TableStatisticsMetadata getNewTableStatisticsMetadata(Session session, String catalogName, ConnectorTableMetadata tableMetadata) + { + throw new UnsupportedOperationException(); + } + + @Override + public TableStatisticsMetadata getInsertStatisticsMetadata(Session session, TableHandle tableHandle) + { + throw new UnsupportedOperationException(); + } + @Override public void beginQuery(Session session, Set connectors) { @@ -273,7 +287,7 @@ public InsertTableHandle beginInsert(Session session, TableHandle tableHandle) } @Override - public Optional finishInsert(Session session, InsertTableHandle tableHandle, Collection fragments) + public Optional finishInsert(Session session, InsertTableHandle tableHandle, Collection fragments, List computedStatistics) { throw new UnsupportedOperationException(); } diff --git a/presto-main/src/test/java/com/facebook/presto/operator/TestTableWriterOperator.java b/presto-main/src/test/java/com/facebook/presto/operator/TestTableWriterOperator.java index 6eda3282c7862..4b58bbc5b98ef 100644 --- a/presto-main/src/test/java/com/facebook/presto/operator/TestTableWriterOperator.java +++ b/presto-main/src/test/java/com/facebook/presto/operator/TestTableWriterOperator.java @@ -15,9 +15,14 @@ import com.facebook.presto.RowPagesBuilder; import com.facebook.presto.connector.ConnectorId; +import com.facebook.presto.metadata.MetadataManager; import com.facebook.presto.metadata.OutputTableHandle; +import com.facebook.presto.metadata.Signature; +import com.facebook.presto.operator.AggregationOperator.AggregationOperatorFactory; +import com.facebook.presto.operator.NoOpOperator.NoOpOperatorFactory; import com.facebook.presto.operator.TableWriterOperator.TableWriterInfo; import com.facebook.presto.operator.TableWriterOperator.TableWriterOperatorFactory; +import com.facebook.presto.operator.aggregation.InternalAggregationFunction; import com.facebook.presto.spi.ConnectorInsertTableHandle; import com.facebook.presto.spi.ConnectorOutputTableHandle; import com.facebook.presto.spi.ConnectorPageSink; @@ -26,7 +31,9 @@ import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.connector.ConnectorPageSinkProvider; import com.facebook.presto.spi.connector.ConnectorTransactionHandle; +import com.facebook.presto.spi.type.Type; import com.facebook.presto.split.PageSinkManager; +import com.facebook.presto.sql.planner.plan.AggregationNode; import com.facebook.presto.sql.planner.plan.PlanNodeId; import com.facebook.presto.sql.planner.plan.TableWriterNode; import com.google.common.collect.ImmutableList; @@ -38,14 +45,17 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.ScheduledExecutorService; import static com.facebook.presto.RowPagesBuilder.rowPagesBuilder; import static com.facebook.presto.SessionTestUtils.TEST_SESSION; +import static com.facebook.presto.metadata.FunctionKind.AGGREGATE; import static com.facebook.presto.operator.PageAssertions.assertPageEquals; import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; import static com.facebook.presto.testing.TestingTaskContext.createTaskContext; import static io.airlift.concurrent.Threads.daemonThreadsNamed; import static java.util.concurrent.CompletableFuture.completedFuture; @@ -59,6 +69,8 @@ public class TestTableWriterOperator { private static final ConnectorId CONNECTOR_ID = new ConnectorId("testConnectorId"); + private static final InternalAggregationFunction LONG_MAX = MetadataManager.createTestMetadataManager().getFunctionRegistry().getAggregateFunctionImplementation( + new Signature("max", AGGREGATE, BIGINT.getTypeSignature(), BIGINT.getTypeSignature())); private ExecutorService executor; private ScheduledExecutorService scheduledExecutor; @@ -119,10 +131,10 @@ public void testBlockedPageSink() // complete previously blocked future blockingPageSink.complete(); // and getOutput which actually finishes the operator - assertPageEquals( - TableWriterOperator.TYPES, + List expectedTypes = ImmutableList.of(BIGINT, VARBINARY); + assertPageEquals(expectedTypes, operator.getOutput(), - rowPagesBuilder(TableWriterOperator.TYPES).row(2, null).build().get(0)); + rowPagesBuilder(expectedTypes).row(2, null).build().get(0)); assertTrue(operator.isBlocked().isDone()); assertTrue(operator.isFinished()); @@ -147,7 +159,10 @@ public void testTableWriterInfo() { PageSinkManager pageSinkManager = new PageSinkManager(); pageSinkManager.addConnectorPageSinkProvider(CONNECTOR_ID, new ConstantPageSinkProvider(new MemoryAccountingTestPageSink())); - TableWriterOperator tableWriterOperator = (TableWriterOperator) createTableWriterOperator(pageSinkManager); + TableWriterOperator tableWriterOperator = (TableWriterOperator) createTableWriterOperator( + pageSinkManager, + new NoOpOperatorFactory(1, new PlanNodeId("test")), + ImmutableList.of(BIGINT, VARBINARY)); RowPagesBuilder rowPagesBuilder = rowPagesBuilder(BIGINT); for (int i = 0; i < 100; i++) { @@ -165,14 +180,51 @@ public void testTableWriterInfo() } } + @Test + public void testStatisticsAggregation() + { + PageSinkManager pageSinkManager = new PageSinkManager(); + pageSinkManager.addConnectorPageSinkProvider(CONNECTOR_ID, new ConstantPageSinkProvider(new MemoryAccountingTestPageSink())); + ImmutableList outputTypes = ImmutableList.of(BIGINT, VARBINARY, BIGINT); + TableWriterOperator operator = (TableWriterOperator) createTableWriterOperator( + pageSinkManager, + new AggregationOperatorFactory( + 1, + new PlanNodeId("test"), + AggregationNode.Step.SINGLE, + ImmutableList.of(LONG_MAX.bind(ImmutableList.of(0), Optional.empty()))), + outputTypes); + + operator.addInput(rowPagesBuilder(BIGINT).row(42).build().get(0)); + operator.addInput(rowPagesBuilder(BIGINT).row(43).build().get(0)); + + assertTrue(operator.isBlocked().isDone()); + assertTrue(operator.needsInput()); + + operator.finish(); + assertFalse(operator.isFinished()); + + assertPageEquals(outputTypes, operator.getOutput(), + rowPagesBuilder(outputTypes) + .row(null, null, 43).build().get(0)); + + assertPageEquals(outputTypes, operator.getOutput(), + rowPagesBuilder(outputTypes) + .row(2, null, null).build().get(0)); + + assertTrue(operator.isBlocked().isDone()); + assertFalse(operator.needsInput()); + assertTrue(operator.isFinished()); + } + private Operator createTableWriterOperator(BlockingPageSink blockingPageSink) { PageSinkManager pageSinkManager = new PageSinkManager(); pageSinkManager.addConnectorPageSinkProvider(CONNECTOR_ID, new ConstantPageSinkProvider(blockingPageSink)); - return createTableWriterOperator(pageSinkManager); + return createTableWriterOperator(pageSinkManager, new NoOpOperatorFactory(1, new PlanNodeId("test")), ImmutableList.of(BIGINT, VARBINARY)); } - private Operator createTableWriterOperator(PageSinkManager pageSinkManager) + private Operator createTableWriterOperator(PageSinkManager pageSinkManager, OperatorFactory statisticsAggregation, List outputTypes) { TableWriterOperatorFactory factory = new TableWriterOperatorFactory( 0, @@ -184,7 +236,9 @@ private Operator createTableWriterOperator(PageSinkManager pageSinkManager) new ConnectorOutputTableHandle() {}), new SchemaTableName("testSchema", "testTable")), ImmutableList.of(0), - TEST_SESSION); + TEST_SESSION, + statisticsAggregation, + outputTypes); return factory.createOperator(createTaskContext(executor, scheduledExecutor, TEST_SESSION) .addPipelineContext(0, true, true) diff --git a/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/PlanBuilder.java b/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/PlanBuilder.java index 92f003e074348..a6841045543f0 100644 --- a/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/PlanBuilder.java +++ b/presto-main/src/test/java/com/facebook/presto/sql/planner/iterative/rule/test/PlanBuilder.java @@ -423,7 +423,9 @@ public TableFinishNode tableDelete(SchemaTableName schemaTableName, PlanNode del .addInputsSet(deleteRowId) .singleDistributionPartitioningScheme(deleteRowId)), deleteHandle, - ImmutableList.of(deleteRowId)); + ImmutableList.of(deleteRowId), + Optional.empty(), + Optional.empty()); } public ExchangeNode gatheringExchange(ExchangeNode.Scope scope, PlanNode child) @@ -666,6 +668,8 @@ public TableWriterNode tableWriter(List columns, List columnName columns, columnNames, ImmutableList.of(symbol("partialrows", BIGINT), symbol("fragment", VARBINARY)), + Optional.empty(), + Optional.empty(), Optional.empty()); } diff --git a/presto-memory/src/main/java/com/facebook/presto/plugin/memory/MemoryMetadata.java b/presto-memory/src/main/java/com/facebook/presto/plugin/memory/MemoryMetadata.java index 4f1ff3124a982..91eb10a464dbd 100644 --- a/presto-memory/src/main/java/com/facebook/presto/plugin/memory/MemoryMetadata.java +++ b/presto-memory/src/main/java/com/facebook/presto/plugin/memory/MemoryMetadata.java @@ -37,6 +37,7 @@ import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; import com.facebook.presto.spi.predicate.TupleDomain; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import io.airlift.slice.Slice; @@ -184,7 +185,7 @@ public synchronized void renameTable(ConnectorSession session, ConnectorTableHan public synchronized void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { ConnectorOutputTableHandle outputTableHandle = beginCreateTable(session, tableMetadata, Optional.empty()); - finishCreateTable(session, outputTableHandle, ImmutableList.of()); + finishCreateTable(session, outputTableHandle, ImmutableList.of(), ImmutableList.of()); } @Override @@ -227,7 +228,7 @@ private void checkTableNotExists(SchemaTableName tableName) } @Override - public synchronized Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public synchronized Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { requireNonNull(tableHandle, "tableHandle is null"); MemoryOutputTableHandle memoryOutputHandle = (MemoryOutputTableHandle) tableHandle; @@ -244,7 +245,7 @@ public synchronized MemoryInsertTableHandle beginInsert(ConnectorSession session } @Override - public synchronized Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public synchronized Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { requireNonNull(insertHandle, "insertHandle is null"); MemoryInsertTableHandle memoryInsertHandle = (MemoryInsertTableHandle) insertHandle; diff --git a/presto-memory/src/test/java/com/facebook/presto/plugin/memory/TestMemoryMetadata.java b/presto-memory/src/test/java/com/facebook/presto/plugin/memory/TestMemoryMetadata.java index 5a83f3bb33604..17a78acc3d942 100644 --- a/presto-memory/src/test/java/com/facebook/presto/plugin/memory/TestMemoryMetadata.java +++ b/presto-memory/src/test/java/com/facebook/presto/plugin/memory/TestMemoryMetadata.java @@ -69,7 +69,7 @@ public void tableIsCreatedAfterCommits() new ConnectorTableMetadata(schemaTableName, ImmutableList.of(), ImmutableMap.of()), Optional.empty()); - metadata.finishCreateTable(SESSION, table, ImmutableList.of()); + metadata.finishCreateTable(SESSION, table, ImmutableList.of(), ImmutableList.of()); List tables = metadata.listTables(SESSION, null); assertTrue(tables.size() == 1, "Expected only one table"); @@ -154,7 +154,7 @@ public void testReadTableBeforeCreationCompleted() assertTrue(tableLayoutHandle instanceof MemoryTableLayoutHandle); assertTrue(((MemoryTableLayoutHandle) tableLayoutHandle).getDataFragments().isEmpty(), "Data fragments should be empty"); - metadata.finishCreateTable(SESSION, table, ImmutableList.of()); + metadata.finishCreateTable(SESSION, table, ImmutableList.of(), ImmutableList.of()); } @Test @@ -311,7 +311,7 @@ public void testRenameTable() SESSION, new ConnectorTableMetadata(tableName, ImmutableList.of(), ImmutableMap.of()), Optional.empty()); - metadata.finishCreateTable(SESSION, table, ImmutableList.of()); + metadata.finishCreateTable(SESSION, table, ImmutableList.of(), ImmutableList.of()); // rename table to schema which does not exist SchemaTableName invalidSchemaTableName = new SchemaTableName("test_schema_not_exist", "test_table_renamed"); diff --git a/presto-mongodb/src/main/java/com/facebook/presto/mongodb/MongoMetadata.java b/presto-mongodb/src/main/java/com/facebook/presto/mongodb/MongoMetadata.java index b337b1de890f7..0039130312a3c 100644 --- a/presto-mongodb/src/main/java/com/facebook/presto/mongodb/MongoMetadata.java +++ b/presto-mongodb/src/main/java/com/facebook/presto/mongodb/MongoMetadata.java @@ -35,6 +35,7 @@ import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.connector.ConnectorOutputMetadata; import com.facebook.presto.spi.predicate.TupleDomain; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.log.Logger; @@ -229,7 +230,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { clearRollback(); return Optional.empty(); @@ -247,7 +248,7 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { return Optional.empty(); } diff --git a/presto-product-tests/conf/presto/etc/catalog/hive.properties b/presto-product-tests/conf/presto/etc/catalog/hive.properties index 677b8927f49e0..8bba6bab4c454 100644 --- a/presto-product-tests/conf/presto/etc/catalog/hive.properties +++ b/presto-product-tests/conf/presto/etc/catalog/hive.properties @@ -16,3 +16,4 @@ hive.allow-rename-table=true hive.metastore-cache-ttl=0s hive.fs.cache.max-size=10 hive.max-partitions-per-scan=100 +hive.collect-column-statistics-on-write=ENABLED diff --git a/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-hdfs-impersonation/hive.properties b/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-hdfs-impersonation/hive.properties index 740a5a826a7cc..0ec63b9464cfb 100644 --- a/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-hdfs-impersonation/hive.properties +++ b/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-hdfs-impersonation/hive.properties @@ -18,3 +18,4 @@ hive.hdfs.authentication.type=NONE hive.hdfs.impersonation.enabled=true hive.fs.cache.max-size=10 hive.max-partitions-per-scan=100 +hive.collect-column-statistics-on-write=ENABLED diff --git a/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-impersonation/hive.properties b/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-impersonation/hive.properties index ecdb284842561..30784a9c570ed 100644 --- a/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-impersonation/hive.properties +++ b/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-impersonation/hive.properties @@ -9,6 +9,7 @@ connector.name=hive-hadoop2 hive.metastore.uri=thrift://hadoop-master:9083 hive.metastore.thrift.client.socks-proxy=hadoop-master:1180 hive.metastore-cache-ttl=0s +hive.collect-column-statistics-on-write=ENABLED hive.metastore.authentication.type=KERBEROS hive.metastore.service.principal=hive/hadoop-master@LABS.TERADATA.COM diff --git a/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-no-impersonation/hive.properties b/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-no-impersonation/hive.properties index e8ec596ca7759..981ce055c3343 100644 --- a/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-no-impersonation/hive.properties +++ b/presto-product-tests/conf/presto/etc/environment-specific-catalogs/singlenode-kerberos-hdfs-no-impersonation/hive.properties @@ -14,6 +14,7 @@ hive.metastore-cache-ttl=0s hive.allow-add-column=true hive.allow-drop-column=true hive.allow-rename-column=true +hive.collect-column-statistics-on-write=ENABLED hive.metastore.authentication.type=KERBEROS hive.metastore.service.principal=hive/hadoop-master@LABS.TERADATA.COM diff --git a/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveTableStatistics.java b/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveTableStatistics.java index 4b7e7dff160a9..65b435f66bc67 100644 --- a/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveTableStatistics.java +++ b/presto-product-tests/src/main/java/com/facebook/presto/tests/hive/TestHiveTableStatistics.java @@ -13,11 +13,13 @@ */ package com.facebook.presto.tests.hive; +import com.google.common.collect.ImmutableList; import io.prestodb.tempto.ProductTest; import io.prestodb.tempto.Requirement; import io.prestodb.tempto.Requirements; import io.prestodb.tempto.RequirementsProvider; import io.prestodb.tempto.Requires; +import io.prestodb.tempto.assertions.QueryAssert.Row; import io.prestodb.tempto.configuration.Configuration; import io.prestodb.tempto.fulfillment.table.MutableTableRequirement; import io.prestodb.tempto.fulfillment.table.hive.HiveTableDefinition; @@ -25,6 +27,8 @@ import io.prestodb.tempto.query.QueryExecutor; import org.testng.annotations.Test; +import java.util.List; + import static com.facebook.presto.tests.TestGroups.HIVE_CONNECTOR; import static com.facebook.presto.tests.TestGroups.SKIP_ON_CDH; import static com.facebook.presto.tests.hive.AllSimpleTypesTableDefinitions.ALL_HIVE_SIMPLE_TYPES_TEXTFILE; @@ -37,6 +41,7 @@ import static io.prestodb.tempto.fulfillment.table.TableRequirements.mutableTable; import static io.prestodb.tempto.fulfillment.table.hive.tpch.TpchTableDefinitions.NATION; import static io.prestodb.tempto.query.QueryExecutor.query; +import static java.lang.String.format; public class TestHiveTableStatistics extends ProductTest @@ -69,6 +74,7 @@ public Requirement getRequirements(Configuration configuration) private static final String ALL_TYPES_TABLE_NAME = "all_types"; private static final String EMPTY_ALL_TYPES_TABLE_NAME = "empty_all_types"; + private static final String ALL_TYPES_ALL_NULL_TABLE_NAME = "all_types_all_null"; private static final HiveTableDefinition ALL_TYPES_TABLE = HiveTableDefinition.like(ALL_HIVE_SIMPLE_TYPES_TEXTFILE) .setDataSource(InlineDataSource.createStringDataSource( @@ -78,6 +84,67 @@ public Requirement getRequirements(Configuration configuration) "127|32767|2147483647|9223372036854775807|123.345|235.567|345.678|345.678|2015-05-10 12:15:35.123456|2015-06-10|ala ma kota|ala ma kot|ala ma |true|a290IGJpbmFybnk=|\n")) .build(); + private static final HiveTableDefinition ALL_TYPES_ALL_NULL_TABLE = HiveTableDefinition.like(ALL_HIVE_SIMPLE_TYPES_TEXTFILE) + .setDataSource(InlineDataSource.createStringDataSource( + "all_analyzable_types_all_null", + "", + "\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\\N|\n")) + .build(); + + private static final List ALL_TYPES_TABLE_STATISTICS = ImmutableList.of( + row("c_tinyint", null, 2.0, 0.0, null, "121", "127"), + row("c_smallint", null, 2.0, 0.0, null, "32761", "32767"), + row("c_int", null, 2.0, 0.0, null, "2147483641", "2147483647"), + row("c_bigint", null, 2.0, 0.0, null, "9223372036854775801", "9223372036854775807"), + row("c_float", null, 2.0, 0.0, null, "123.341", "123.345"), + row("c_double", null, 2.0, 0.0, null, "234.561", "235.567"), + row("c_decimal", null, 2.0, 0.0, null, "345", "346"), + row("c_decimal_w_params", null, 2.0, 0.0, null, "345.67100", "345.67800"), + row("c_timestamp", null, 2.0, 0.0, null, "2015-05-10 12:15:31.000", "2015-05-10 12:15:35.000"), + row("c_date", null, 2.0, 0.0, null, "2015-05-09", "2015-06-10"), + row("c_string", null, 2.0, 0.0, null, null, null), + row("c_varchar", null, 2.0, 0.0, null, null, null), + row("c_char", null, 2.0, 0.0, null, null, null), + row("c_boolean", null, 2.0, 0.0, null, null, null), + row("c_binary", null, null, 0.0, null, null, null), + row(null, null, null, null, 2.0, null, null)); + + private static final List ALL_TYPES_ALL_NULL_TABLE_STATISTICS = ImmutableList.of( + row("c_tinyint", null, 0.0, 1.0, null, null, null), + row("c_smallint", null, 0.0, 1.0, null, null, null), + row("c_int", null, 0.0, 1.0, null, null, null), + row("c_bigint", null, 0.0, 1.0, null, null, null), + row("c_float", null, 0.0, 1.0, null, null, null), + row("c_double", null, 0.0, 1.0, null, null, null), + row("c_decimal", null, 0.0, 1.0, null, null, null), + row("c_decimal_w_params", null, 0.0, 1.0, null, null, null), + row("c_timestamp", null, 0.0, 1.0, null, null, null), + row("c_date", null, 0.0, 1.0, null, null, null), + row("c_string", null, 0.0, 1.0, null, null, null), + row("c_varchar", null, 0.0, 1.0, null, null, null), + row("c_char", null, 0.0, 1.0, null, null, null), + row("c_boolean", null, 0.0, 1.0, null, null, null), + row("c_binary", null, null, 1.0, null, null, null), + row(null, null, null, null, 1.0, null, null)); + + private static final List ALL_TYPES_EMPTY_TABLE_STATISTICS = ImmutableList.of( + row("c_tinyint", null, 0.0, 0.0, null, null, null), + row("c_smallint", null, 0.0, 0.0, null, null, null), + row("c_int", null, 0.0, 0.0, null, null, null), + row("c_bigint", null, 0.0, 0.0, null, null, null), + row("c_float", null, 0.0, 0.0, null, null, null), + row("c_double", null, 0.0, 0.0, null, null, null), + row("c_decimal", null, 0.0, 0.0, null, null, null), + row("c_decimal_w_params", null, 0.0, 0.0, null, null, null), + row("c_timestamp", null, 0.0, 0.0, null, null, null), + row("c_date", null, 0.0, 0.0, null, null, null), + row("c_string", null, 0.0, 0.0, null, null, null), + row("c_varchar", null, 0.0, 0.0, null, null, null), + row("c_char", null, 0.0, 0.0, null, null, null), + row("c_boolean", null, 0.0, 0.0, null, null, null), + row("c_binary", null, null, 0.0, null, null, null), + row(null, null, null, null, 0.0, null, null)); + private static final class AllTypesTable implements RequirementsProvider { @@ -86,6 +153,7 @@ public Requirement getRequirements(Configuration configuration) { return Requirements.compose( mutableTable(ALL_TYPES_TABLE, ALL_TYPES_TABLE_NAME, MutableTableRequirement.State.LOADED), + mutableTable(ALL_TYPES_ALL_NULL_TABLE, ALL_TYPES_ALL_NULL_TABLE_NAME, MutableTableRequirement.State.LOADED), mutableTable(ALL_TYPES_TABLE, EMPTY_ALL_TYPES_TABLE_NAME, MutableTableRequirement.State.CREATED)); } } @@ -401,6 +469,321 @@ public void testStatisticsForAllDataTypesOnlyNulls() row(null, null, null, null, 1.0, null, null)); } + @Test + @Requires(AllTypesTable.class) + public void testComputeTableStatisticsOnCreateTable() + { + String allTypesTable = mutableTablesState().get(ALL_TYPES_TABLE_NAME).getNameInDatabase(); + String emptyAllTypesTable = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase(); + String allTypesAllNullTable = mutableTablesState().get(ALL_TYPES_ALL_NULL_TABLE_NAME).getNameInDatabase(); + + assertComputeTableStatisticsOnCreateTable(allTypesTable, ALL_TYPES_TABLE_STATISTICS); + assertComputeTableStatisticsOnCreateTable(emptyAllTypesTable, ALL_TYPES_EMPTY_TABLE_STATISTICS); + assertComputeTableStatisticsOnCreateTable(allTypesAllNullTable, ALL_TYPES_ALL_NULL_TABLE_STATISTICS); + } + + @Test + @Requires(AllTypesTable.class) + public void testComputeTableStatisticsOnInsert() + { + String allTypesTable = mutableTablesState().get(ALL_TYPES_TABLE_NAME).getNameInDatabase(); + String emptyAllTypesTable = mutableTablesState().get(EMPTY_ALL_TYPES_TABLE_NAME).getNameInDatabase(); + String allTypesAllNullTable = mutableTablesState().get(ALL_TYPES_ALL_NULL_TABLE_NAME).getNameInDatabase(); + + assertComputeTableStatisticsOnInsert(allTypesTable, ALL_TYPES_TABLE_STATISTICS); + assertComputeTableStatisticsOnInsert(emptyAllTypesTable, ALL_TYPES_EMPTY_TABLE_STATISTICS); + assertComputeTableStatisticsOnInsert(allTypesAllNullTable, ALL_TYPES_ALL_NULL_TABLE_STATISTICS); + + String tableName = "test_update_table_statistics"; + query(format("DROP TABLE IF EXISTS %s", tableName)); + try { + query(format("CREATE TABLE %s AS SELECT * FROM %s WITH NO DATA", tableName, allTypesTable)); + query(format("INSERT INTO %s SELECT * FROM %s", tableName, allTypesTable)); + query(format("INSERT INTO %s SELECT * FROM %s", tableName, allTypesAllNullTable)); + query(format("INSERT INTO %s SELECT * FROM %s", tableName, allTypesAllNullTable)); + assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(ImmutableList.of( + row("c_tinyint", null, 2.0, 0.5, null, "121", "127"), + row("c_smallint", null, 2.0, 0.5, null, "32761", "32767"), + row("c_int", null, 2.0, 0.5, null, "2147483641", "2147483647"), + row("c_bigint", null, 2.0, 0.5, null, "9223372036854775801", "9223372036854775807"), + row("c_float", null, 2.0, 0.5, null, "123.341", "123.345"), + row("c_double", null, 2.0, 0.5, null, "234.561", "235.567"), + row("c_decimal", null, 2.0, 0.5, null, "345", "346"), + row("c_decimal_w_params", null, 2.0, 0.5, null, "345.67100", "345.67800"), + row("c_timestamp", null, 2.0, 0.5, null, "2015-05-10 12:15:31.000", "2015-05-10 12:15:35.000"), + row("c_date", null, 2.0, 0.5, null, "2015-05-09", "2015-06-10"), + row("c_string", null, 2.0, 0.5, null, null, null), + row("c_varchar", null, 2.0, 0.5, null, null, null), + row("c_char", null, 2.0, 0.5, null, null, null), + row("c_boolean", null, 2.0, 0.5, null, null, null), + row("c_binary", null, null, 0.5, null, null, null), + row(null, null, null, null, 4.0, null, null))); + + query(format("INSERT INTO %s VALUES( " + + "TINYINT '120', " + + "SMALLINT '32760', " + + "INTEGER '2147483640', " + + "BIGINT '9223372036854775800', " + + "REAL '123.340', " + + "DOUBLE '234.560', " + + "CAST(343.0 AS DECIMAL(10, 0)), " + + "CAST(345.670 AS DECIMAL(10, 5)), " + + "TIMESTAMP '2015-05-10 12:15:30', " + + "DATE '2015-05-08', " + + "CAST('ela ma kot' AS VARCHAR), " + + "CAST('ela ma ko' AS VARCHAR(10)), " + + "CAST('ela m ' AS CHAR(10)), " + + "false, " + + "CAST('cGllcyBiaW5hcm54' as VARBINARY))", tableName)); + + assertThat(query("SHOW STATS FOR " + tableName)).containsOnly(ImmutableList.of( + row("c_tinyint", null, 2.0, 0.4, null, "120", "127"), + row("c_smallint", null, 2.0, 0.4, null, "32760", "32767"), + row("c_int", null, 2.0, 0.4, null, "2147483640", "2147483647"), + row("c_bigint", null, 2.0, 0.4, null, "9223372036854775800", "9223372036854775807"), + row("c_float", null, 2.0, 0.4, null, "123.34", "123.345"), + row("c_double", null, 2.0, 0.4, null, "234.56", "235.567"), + row("c_decimal", null, 2.0, 0.4, null, "343", "346"), + row("c_decimal_w_params", null, 2.0, 0.4, null, "345.67000", "345.67800"), + row("c_timestamp", null, 2.0, 0.4, null, "2015-05-10 12:15:30.000", "2015-05-10 12:15:35.000"), + row("c_date", null, 2.0, 0.4, null, "2015-05-08", "2015-06-10"), + row("c_string", null, 2.0, 0.4, null, null, null), + row("c_varchar", null, 2.0, 0.4, null, null, null), + row("c_char", null, 2.0, 0.4, null, null, null), + row("c_boolean", null, 2.0, 0.4, null, null, null), + row("c_binary", null, null, 0.4, null, null, null), + row(null, null, null, null, 5.0, null, null))); + } + finally { + query(format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + @Requires(AllTypesTable.class) + public void testComputePartitionStatisticsOnCreateTable() + { + String tableName = "test_compute_partition_statistics_on_create_table"; + query(format("DROP TABLE IF EXISTS %s", tableName)); + try { + query(format("CREATE TABLE %s WITH ( " + + " partitioned_by = ARRAY['p_bigint', 'p_varchar']" + + ") AS " + + "SELECT * FROM ( " + + " VALUES " + + " (TINYINT '120', SMALLINT '32760', INTEGER '2147483640', BIGINT '9223372036854775800', REAL '123.340', DOUBLE '234.560', CAST(343.0 AS DECIMAL(10, 0)), CAST(345.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:15:30', DATE '2015-05-08', CAST('p1 varchar' AS VARCHAR), CAST('p1 varchar10' AS VARCHAR(10)), CAST('p1 char10' AS CHAR(10)), false, CAST('p1 binary' as VARBINARY), BIGINT '1', CAST('partition1' AS VARCHAR)), " + + " (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '1', 'partition1'), " + + " (TINYINT '99', SMALLINT '333', INTEGER '444', BIGINT '555', REAL '666.340', DOUBLE '777.560', CAST(888.0 AS DECIMAL(10, 0)), CAST(999.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:45:30', DATE '2015-05-09', CAST('p2 varchar' AS VARCHAR), CAST('p2 varchar10' AS VARCHAR(10)), CAST('p2 char10' AS CHAR(10)), true, CAST('p2 binary' as VARBINARY), BIGINT '2', CAST('partition2' AS VARCHAR)), " + + " (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '2', 'partition2') " + + "" + + ") AS t (c_tinyint, c_smallint, c_int, c_bigint, c_float, c_double, c_decimal, c_decimal_w_params, c_timestamp, c_date, c_string, c_varchar, c_char, c_boolean, c_binary, p_bigint, p_varchar)", tableName)); + + assertThat(query(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 1 AND p_varchar = 'partition1')", tableName))).containsOnly(ImmutableList.of( + row("c_tinyint", null, 1.0, 0.5, null, "120", "120"), + row("c_smallint", null, 1.0, 0.5, null, "32760", "32760"), + row("c_int", null, 1.0, 0.5, null, "2147483640", "2147483640"), + row("c_bigint", null, 1.0, 0.5, null, "9223372036854775800", "9223372036854775800"), + row("c_float", null, 1.0, 0.5, null, "123.34", "123.34"), + row("c_double", null, 1.0, 0.5, null, "234.56", "234.56"), + row("c_decimal", null, 1.0, 0.5, null, "343", "343"), + row("c_decimal_w_params", null, 1.0, 0.5, null, "345.67000", "345.67000"), + row("c_timestamp", null, 1.0, 0.5, null, "2015-05-10 12:15:30.000", "2015-05-10 12:15:30.000"), + row("c_date", null, 1.0, 0.5, null, "2015-05-08", "2015-05-08"), + row("c_string", null, 1.0, 0.5, null, null, null), + row("c_varchar", null, 1.0, 0.5, null, null, null), + row("c_char", null, 1.0, 0.5, null, null, null), + row("c_boolean", null, 1.0, 0.5, null, null, null), + row("c_binary", null, null, 0.5, null, null, null), + row("p_bigint", null, 1.0, 0.0, null, "1", "1"), + row("p_varchar", null, 1.0, 0.0, null, null, null), + row(null, null, null, null, 2.0, null, null))); + + assertThat(query(format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 2 AND p_varchar = 'partition2')", tableName))).containsOnly(ImmutableList.of( + row("c_tinyint", null, 1.0, 0.5, null, "99", "99"), + row("c_smallint", null, 1.0, 0.5, null, "333", "333"), + row("c_int", null, 1.0, 0.5, null, "444", "444"), + row("c_bigint", null, 1.0, 0.5, null, "555", "555"), + row("c_float", null, 1.0, 0.5, null, "666.34", "666.34"), + row("c_double", null, 1.0, 0.5, null, "777.56", "777.56"), + row("c_decimal", null, 1.0, 0.5, null, "888", "888"), + row("c_decimal_w_params", null, 1.0, 0.5, null, "999.67000", "999.67000"), + row("c_timestamp", null, 1.0, 0.5, null, "2015-05-10 12:45:30.000", "2015-05-10 12:45:30.000"), + row("c_date", null, 1.0, 0.5, null, "2015-05-09", "2015-05-09"), + row("c_string", null, 1.0, 0.5, null, null, null), + row("c_varchar", null, 1.0, 0.5, null, null, null), + row("c_char", null, 1.0, 0.5, null, null, null), + row("c_boolean", null, 1.0, 0.5, null, null, null), + row("c_binary", null, null, 0.5, null, null, null), + row("p_bigint", null, 1.0, 0.0, null, "2", "2"), + row("p_varchar", null, 1.0, 0.0, null, null, null), + row(null, null, null, null, 2.0, null, null))); + } + finally { + query(format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + @Test + @Requires(AllTypesTable.class) + public void testComputePartitionStatisticsOnInsert() + { + String tableName = "test_compute_partition_statistics_on_insert"; + + query(format("DROP TABLE IF EXISTS %s", tableName)); + try { + query(format("CREATE TABLE %s(" + + "c_tinyint TINYINT, " + + "c_smallint SMALLINT, " + + "c_int INT, " + + "c_bigint BIGINT, " + + "c_float REAL, " + + "c_double DOUBLE, " + + "c_decimal DECIMAL(10,0), " + + "c_decimal_w_params DECIMAL(10,5), " + + "c_timestamp TIMESTAMP, " + + "c_date DATE, " + + "c_string VARCHAR, " + + "c_varchar VARCHAR(10), " + + "c_char CHAR(10), " + + "c_boolean BOOLEAN, " + + "c_binary VARBINARY, " + + "" + + "p_bigint BIGINT, " + + "p_varchar VARCHAR " + + ") WITH ( " + + " partitioned_by = ARRAY['p_bigint', 'p_varchar']" + + ")", tableName)); + + query(format("INSERT INTO %s VALUES " + + "(TINYINT '120', SMALLINT '32760', INTEGER '2147483640', BIGINT '9223372036854775800', REAL '123.340', DOUBLE '234.560', CAST(343.0 AS DECIMAL(10, 0)), CAST(345.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:15:30', DATE '2015-05-08', 'p1 varchar', CAST('p1 varchar10' AS VARCHAR(10)), CAST('p1 char10' AS CHAR(10)), false, CAST('p1 binary' as VARBINARY), BIGINT '1', 'partition1')," + + "(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '1', 'partition1')", tableName)); + + query(format("INSERT INTO %s VALUES " + + "(TINYINT '99', SMALLINT '333', INTEGER '444', BIGINT '555', REAL '666.340', DOUBLE '777.560', CAST(888.0 AS DECIMAL(10, 0)), CAST(999.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:45:30', DATE '2015-05-09', 'p2 varchar', CAST('p2 varchar10' AS VARCHAR(10)), CAST('p2 char10' AS CHAR(10)), true, CAST('p2 binary' as VARBINARY), BIGINT '2', 'partition2')," + + "(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '2', 'partition2')", tableName)); + + String showStatsPartitionOne = format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 1 AND p_varchar = 'partition1')", tableName); + String showStatsPartitionTwo = format("SHOW STATS FOR (SELECT * FROM %s WHERE p_bigint = 2 AND p_varchar = 'partition2')", tableName); + + assertThat(query(showStatsPartitionOne)).containsOnly(ImmutableList.of( + row("c_tinyint", null, 1.0, 0.5, null, "120", "120"), + row("c_smallint", null, 1.0, 0.5, null, "32760", "32760"), + row("c_int", null, 1.0, 0.5, null, "2147483640", "2147483640"), + row("c_bigint", null, 1.0, 0.5, null, "9223372036854775800", "9223372036854775800"), + row("c_float", null, 1.0, 0.5, null, "123.34", "123.34"), + row("c_double", null, 1.0, 0.5, null, "234.56", "234.56"), + row("c_decimal", null, 1.0, 0.5, null, "343", "343"), + row("c_decimal_w_params", null, 1.0, 0.5, null, "345.67000", "345.67000"), + row("c_timestamp", null, 1.0, 0.5, null, "2015-05-10 12:15:30.000", "2015-05-10 12:15:30.000"), + row("c_date", null, 1.0, 0.5, null, "2015-05-08", "2015-05-08"), + row("c_string", null, 1.0, 0.5, null, null, null), + row("c_varchar", null, 1.0, 0.5, null, null, null), + row("c_char", null, 1.0, 0.5, null, null, null), + row("c_boolean", null, 1.0, 0.5, null, null, null), + row("c_binary", null, null, 0.5, null, null, null), + row("p_bigint", null, 1.0, 0.0, null, "1", "1"), + row("p_varchar", null, 1.0, 0.0, null, null, null), + row(null, null, null, null, 2.0, null, null))); + + assertThat(query(showStatsPartitionTwo)).containsOnly(ImmutableList.of( + row("c_tinyint", null, 1.0, 0.5, null, "99", "99"), + row("c_smallint", null, 1.0, 0.5, null, "333", "333"), + row("c_int", null, 1.0, 0.5, null, "444", "444"), + row("c_bigint", null, 1.0, 0.5, null, "555", "555"), + row("c_float", null, 1.0, 0.5, null, "666.34", "666.34"), + row("c_double", null, 1.0, 0.5, null, "777.56", "777.56"), + row("c_decimal", null, 1.0, 0.5, null, "888", "888"), + row("c_decimal_w_params", null, 1.0, 0.5, null, "999.67000", "999.67000"), + row("c_timestamp", null, 1.0, 0.5, null, "2015-05-10 12:45:30.000", "2015-05-10 12:45:30.000"), + row("c_date", null, 1.0, 0.5, null, "2015-05-09", "2015-05-09"), + row("c_string", null, 1.0, 0.5, null, null, null), + row("c_varchar", null, 1.0, 0.5, null, null, null), + row("c_char", null, 1.0, 0.5, null, null, null), + row("c_boolean", null, 1.0, 0.5, null, null, null), + row("c_binary", null, null, 0.5, null, null, null), + row("p_bigint", null, 1.0, 0.0, null, "2", "2"), + row("p_varchar", null, 1.0, 0.0, null, null, null), + row(null, null, null, null, 2.0, null, null))); + + query(format("INSERT INTO %s VALUES( TINYINT '119', SMALLINT '32759', INTEGER '2147483639', BIGINT '9223372036854775799', REAL '122.340', DOUBLE '233.560', CAST(342.0 AS DECIMAL(10, 0)), CAST(344.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:15:29', DATE '2015-05-07', 'p1 varchar', CAST('p1 varchar10' AS VARCHAR(10)), CAST('p1 char10' AS CHAR(10)), true, CAST('p1 binary' as VARBINARY), BIGINT '1', 'partition1')", tableName)); + query(format("INSERT INTO %s VALUES( null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '1', 'partition1')", tableName)); + + assertThat(query(showStatsPartitionOne)).containsOnly(ImmutableList.of( + row("c_tinyint", null, 1.0, 0.5, null, "119", "120"), + row("c_smallint", null, 1.0, 0.5, null, "32759", "32760"), + row("c_int", null, 1.0, 0.5, null, "2147483639", "2147483640"), + row("c_bigint", null, 1.0, 0.5, null, "9223372036854775799", "9223372036854775800"), + row("c_float", null, 1.0, 0.5, null, "122.34", "123.34"), + row("c_double", null, 1.0, 0.5, null, "233.56", "234.56"), + row("c_decimal", null, 1.0, 0.5, null, "342", "343"), + row("c_decimal_w_params", null, 1.0, 0.5, null, "344.67000", "345.67000"), + row("c_timestamp", null, 1.0, 0.5, null, "2015-05-10 12:15:29.000", "2015-05-10 12:15:30.000"), + row("c_date", null, 1.0, 0.5, null, "2015-05-07", "2015-05-08"), + row("c_string", null, 1.0, 0.5, null, null, null), + row("c_varchar", null, 1.0, 0.5, null, null, null), + row("c_char", null, 1.0, 0.5, null, null, null), + row("c_boolean", null, 2.0, 0.5, null, null, null), + row("c_binary", null, null, 0.5, null, null, null), + row("p_bigint", null, 1.0, 0.0, null, "1", "1"), + row("p_varchar", null, 1.0, 0.0, null, null, null), + row(null, null, null, null, 4.0, null, null))); + + query(format("INSERT INTO %s VALUES( TINYINT '100', SMALLINT '334', INTEGER '445', BIGINT '556', REAL '667.340', DOUBLE '778.560', CAST(889.0 AS DECIMAL(10, 0)), CAST(1000.670 AS DECIMAL(10, 5)), TIMESTAMP '2015-05-10 12:45:31', DATE '2015-05-10', CAST('p2 varchar' AS VARCHAR), CAST('p2 varchar10' AS VARCHAR(10)), CAST('p2 char10' AS CHAR(10)), true, CAST('p2 binary' as VARBINARY), BIGINT '2', 'partition2')", tableName)); + query(format("INSERT INTO %s VALUES( null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, BIGINT '2', 'partition2')", tableName)); + + assertThat(query(showStatsPartitionTwo)).containsOnly(ImmutableList.of( + row("c_tinyint", null, 1.0, 0.5, null, "99", "100"), + row("c_smallint", null, 1.0, 0.5, null, "333", "334"), + row("c_int", null, 1.0, 0.5, null, "444", "445"), + row("c_bigint", null, 1.0, 0.5, null, "555", "556"), + row("c_float", null, 1.0, 0.5, null, "666.34", "667.34"), + row("c_double", null, 1.0, 0.5, null, "777.56", "778.56"), + row("c_decimal", null, 1.0, 0.5, null, "888", "889"), + row("c_decimal_w_params", null, 1.0, 0.5, null, "999.67000", "1000.67000"), + row("c_timestamp", null, 1.0, 0.5, null, "2015-05-10 12:45:30.000", "2015-05-10 12:45:31.000"), + row("c_date", null, 1.0, 0.5, null, "2015-05-09", "2015-05-10"), + row("c_string", null, 1.0, 0.5, null, null, null), + row("c_varchar", null, 1.0, 0.5, null, null, null), + row("c_char", null, 1.0, 0.5, null, null, null), + row("c_boolean", null, 1.0, 0.5, null, null, null), + row("c_binary", null, null, 0.5, null, null, null), + row("p_bigint", null, 1.0, 0.0, null, "2", "2"), + row("p_varchar", null, 1.0, 0.0, null, null, null), + row(null, null, null, null, 4.0, null, null))); + } + finally { + query(format("DROP TABLE IF EXISTS %s", tableName)); + } + } + + private static void assertComputeTableStatisticsOnCreateTable(String sourceTableName, List expectedStatistics) + { + String copiedTableName = "assert_compute_table_statistics_on_create_table_" + sourceTableName; + query(format("DROP TABLE IF EXISTS %s", copiedTableName)); + try { + query(format("CREATE TABLE %s AS SELECT * FROM %s", copiedTableName, sourceTableName)); + assertThat(query("SHOW STATS FOR " + copiedTableName)).containsOnly(expectedStatistics); + } + finally { + query(format("DROP TABLE IF EXISTS %s", copiedTableName)); + } + } + + private static void assertComputeTableStatisticsOnInsert(String sourceTableName, List expectedStatistics) + { + String copiedTableName = "assert_compute_table_statistics_on_insert_" + sourceTableName; + query(format("DROP TABLE IF EXISTS %s", copiedTableName)); + try { + query(format("CREATE TABLE %s AS SELECT * FROM %s WITH NO DATA", copiedTableName, sourceTableName)); + assertThat(query("SHOW STATS FOR " + copiedTableName)).containsOnly(ALL_TYPES_EMPTY_TABLE_STATISTICS); + query(format("INSERT INTO %s SELECT * FROM %s", copiedTableName, sourceTableName)); + assertThat(query("SHOW STATS FOR " + copiedTableName)).containsOnly(expectedStatistics); + } + finally { + query(format("DROP TABLE IF EXISTS %s", copiedTableName)); + } + } + private static QueryExecutor onHive() { return testContext().getDependency(QueryExecutor.class, "hive"); diff --git a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java index ef5d31ae70a5d..6927e68832a5d 100644 --- a/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java +++ b/presto-raptor/src/main/java/com/facebook/presto/raptor/RaptorMetadata.java @@ -46,6 +46,7 @@ import com.facebook.presto.spi.connector.ConnectorOutputMetadata; import com.facebook.presto.spi.connector.ConnectorPartitioningHandle; import com.facebook.presto.spi.predicate.TupleDomain; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.type.Type; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableListMultimap; @@ -424,7 +425,7 @@ private Distribution getOrCreateDistribution(String name, List columnTypes public void createTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, boolean ignoreExisting) { Optional layout = getNewTableLayout(session, tableMetadata); - finishCreateTable(session, beginCreateTable(session, tableMetadata, layout), ImmutableList.of()); + finishCreateTable(session, beginCreateTable(session, tableMetadata, layout), ImmutableList.of(), ImmutableList.of()); } @Override @@ -626,7 +627,7 @@ private static List getBucketColumnHandles(List buck } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle outputTableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle outputTableHandle, Collection fragments, List computedStatistics) { RaptorOutputTableHandle table = (RaptorOutputTableHandle) outputTableHandle; long transactionId = table.getTransactionId(); @@ -729,7 +730,7 @@ private List getBucketColumnHandles(long tableId) } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { RaptorInsertTableHandle handle = (RaptorInsertTableHandle) insertHandle; long transactionId = handle.getTransactionId(); diff --git a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java index 12f329780234b..7cf5f65a85ab8 100644 --- a/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java +++ b/presto-raptor/src/test/java/com/facebook/presto/raptor/metadata/TestRaptorMetadata.java @@ -391,7 +391,7 @@ public void testCreateBucketedTableAsSelect() assertEquals(partitioning.getDistributionId(), 1); ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, ordersTable, Optional.of(layout)); - metadata.finishCreateTable(SESSION, outputHandle, ImmutableList.of()); + metadata.finishCreateTable(SESSION, outputHandle, ImmutableList.of(), ImmutableList.of()); ConnectorTableHandle tableHandle = metadata.getTableHandle(SESSION, DEFAULT_TEST_ORDERS); assertInstanceOf(tableHandle, RaptorTableHandle.class); @@ -717,7 +717,7 @@ public void testTransactionTableWrite() assertNull(transactionSuccessful(transactionId)); // commit table creation - metadata.finishCreateTable(SESSION, outputHandle, ImmutableList.of()); + metadata.finishCreateTable(SESSION, outputHandle, ImmutableList.of(), ImmutableList.of()); assertTrue(transactionExists(transactionId)); assertTrue(transactionSuccessful(transactionId)); } @@ -740,7 +740,7 @@ public void testTransactionInsert() assertNull(transactionSuccessful(transactionId)); // commit insert - metadata.finishInsert(SESSION, insertHandle, ImmutableList.of()); + metadata.finishInsert(SESSION, insertHandle, ImmutableList.of(), ImmutableList.of()); assertTrue(transactionExists(transactionId)); assertTrue(transactionSuccessful(transactionId)); } @@ -805,7 +805,7 @@ public void testTransactionAbort() // commit table creation try { - metadata.finishCreateTable(SESSION, outputHandle, ImmutableList.of()); + metadata.finishCreateTable(SESSION, outputHandle, ImmutableList.of(), ImmutableList.of()); fail("expected exception"); } catch (PrestoException e) { diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/connector/ConnectorMetadata.java b/presto-spi/src/main/java/com/facebook/presto/spi/connector/ConnectorMetadata.java index d831471751340..aab864f0eaf62 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/connector/ConnectorMetadata.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/connector/ConnectorMetadata.java @@ -36,7 +36,9 @@ import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.security.GrantInfo; import com.facebook.presto.spi.security.Privilege; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.statistics.TableStatistics; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; import io.airlift.slice.Slice; import java.util.Collection; @@ -264,6 +266,22 @@ default Optional getInsertLayout(ConnectorSession sessi return Optional.of(new ConnectorNewTableLayout(partitioningHandle, partitionColumns)); } + /** + * Describes statistics that must be collected for a new table. + */ + default TableStatisticsMetadata getNewTableStatisticsMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + return TableStatisticsMetadata.empty(); + } + + /** + * Describes statistics that must be collected for an existing table during the insert operation. + */ + default TableStatisticsMetadata getInsertIntoTableStatisticsMetadata(ConnectorSession session, ConnectorTableHandle tableHandle) + { + return TableStatisticsMetadata.empty(); + } + /** * Begin the atomic creation of a table with data. */ @@ -275,7 +293,7 @@ default ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Co /** * Finish a table creation with data after the data is written. */ - default Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + default Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { throw new PrestoException(GENERIC_INTERNAL_ERROR, "ConnectorMetadata beginCreateTable() is implemented without finishCreateTable()"); } @@ -302,7 +320,7 @@ default ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connect /** * Finish insert query */ - default Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + default Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { throw new PrestoException(GENERIC_INTERNAL_ERROR, "ConnectorMetadata beginInsert() is implemented without finishInsert()"); } diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java b/presto-spi/src/main/java/com/facebook/presto/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java index 3766af4e45daa..5c73c27204615 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/connector/classloader/ClassLoaderSafeConnectorMetadata.java @@ -38,7 +38,9 @@ import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.security.GrantInfo; import com.facebook.presto.spi.security.Privilege; +import com.facebook.presto.spi.statistics.ComputedStatistics; import com.facebook.presto.spi.statistics.TableStatistics; +import com.facebook.presto.spi.statistics.TableStatisticsMetadata; import io.airlift.slice.Slice; import java.util.Collection; @@ -98,6 +100,22 @@ public Optional getInsertLayout(ConnectorSession sessio } } + @Override + public TableStatisticsMetadata getNewTableStatisticsMetadata(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + return delegate.getNewTableStatisticsMetadata(session, tableMetadata); + } + } + + @Override + public TableStatisticsMetadata getInsertIntoTableStatisticsMetadata(ConnectorSession session, ConnectorTableHandle tableHandle) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { + return delegate.getInsertIntoTableStatisticsMetadata(session, tableHandle); + } + } + @Override public boolean schemaExists(ConnectorSession session, String schemaName) { @@ -267,10 +285,10 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con } @Override - public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments) + public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, List computedStatistics) { try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { - return delegate.finishCreateTable(session, tableHandle, fragments); + return delegate.finishCreateTable(session, tableHandle, fragments, computedStatistics); } } @@ -299,10 +317,10 @@ public ConnectorInsertTableHandle beginInsert(ConnectorSession session, Connecto } @Override - public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments) + public Optional finishInsert(ConnectorSession session, ConnectorInsertTableHandle insertHandle, Collection fragments, List computedStatistics) { try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) { - return delegate.finishInsert(session, insertHandle, fragments); + return delegate.finishInsert(session, insertHandle, fragments, computedStatistics); } } diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatisticMetadata.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatisticMetadata.java new file mode 100644 index 0000000000000..f473209a19bfe --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatisticMetadata.java @@ -0,0 +1,77 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.spi.statistics; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +import java.util.Objects; + +import static java.util.Objects.requireNonNull; + +public class ColumnStatisticMetadata +{ + private final String columnName; + private final ColumnStatisticType statisticType; + + @JsonCreator + public ColumnStatisticMetadata( + @JsonProperty("columnName") String columnName, + @JsonProperty("statisticType") ColumnStatisticType statisticType) + { + this.columnName = requireNonNull(columnName, "columnName is null"); + this.statisticType = requireNonNull(statisticType, "statisticType is null"); + } + + @JsonProperty + public String getColumnName() + { + return columnName; + } + + @JsonProperty + public ColumnStatisticType getStatisticType() + { + return statisticType; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ColumnStatisticMetadata that = (ColumnStatisticMetadata) o; + return Objects.equals(columnName, that.columnName) && + statisticType == that.statisticType; + } + + @Override + public int hashCode() + { + return Objects.hash(columnName, statisticType); + } + + @Override + public String toString() + { + return "ColumnStatisticMetadata{" + + "columnName='" + columnName + '\'' + + ", statisticType=" + statisticType + + '}'; + } +} diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatisticType.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatisticType.java new file mode 100644 index 0000000000000..82cc08bd99e91 --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ColumnStatisticType.java @@ -0,0 +1,25 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.spi.statistics; + +public enum ColumnStatisticType +{ + MIN, + MAX, + NUMBER_OF_DISTINCT_VALUES, + NUMBER_OF_NON_NULL_VALUES, + MAX_VALUE_SIZE_IN_BYTES, + AVERAGE_VALUE_SIZE_IN_BYTES, + NUMBER_OF_TRUE_VALUES, +} diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ComputedStatistics.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ComputedStatistics.java new file mode 100644 index 0000000000000..b016bc7310af9 --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/ComputedStatistics.java @@ -0,0 +1,102 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.spi.statistics; + +import com.facebook.presto.spi.block.Block; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static java.util.Collections.unmodifiableList; +import static java.util.Collections.unmodifiableMap; +import static java.util.Objects.requireNonNull; + +public class ComputedStatistics +{ + private final List groupingColumns; + private final List gropingValues; + private final Map tableStatistics; + private final Map columnStatistics; + + public ComputedStatistics( + List groupingColumns, + List gropingValues, + Map tableStatistics, + Map columnStatistics) + { + this.groupingColumns = unmodifiableList(requireNonNull(groupingColumns, "groupingColumns is null")); + this.gropingValues = unmodifiableList(requireNonNull(gropingValues, "gropingValues is null")); + this.tableStatistics = unmodifiableMap(requireNonNull(tableStatistics, "tableStatistics is null")); + this.columnStatistics = unmodifiableMap(requireNonNull(columnStatistics, "columnStatistics is null")); + } + + public List getGroupingColumns() + { + return groupingColumns; + } + + public List getGropingValues() + { + return gropingValues; + } + + public Map getTableStatistics() + { + return tableStatistics; + } + + public Map getColumnStatistics() + { + return columnStatistics; + } + + public static Builder builder(List groupingColumns, List gropingValues) + { + return new Builder(groupingColumns, gropingValues); + } + + public static class Builder + { + private final List groupingColumns; + private final List gropingValues; + private final Map tableStatistics = new HashMap<>(); + private final Map columnStatistics = new HashMap<>(); + + public Builder(List groupingColumns, List gropingValues) + { + this.groupingColumns = requireNonNull(groupingColumns, "groupingColumns is null"); + this.gropingValues = requireNonNull(gropingValues, "gropingValues is null"); + } + + public void addTableStatistic(TableStatisticType type, Block value) + { + tableStatistics.put(type, value); + } + + public void addColumnStatistic(ColumnStatisticMetadata columnStatisticMetadata, Block value) + { + columnStatistics.put(columnStatisticMetadata, value); + } + + public ComputedStatistics build() + { + return new ComputedStatistics( + unmodifiableList(groupingColumns), + unmodifiableList(gropingValues), + unmodifiableMap(tableStatistics), + unmodifiableMap(columnStatistics)); + } + } +} diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/TableStatisticType.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/TableStatisticType.java new file mode 100644 index 0000000000000..e47367ba1e500 --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/TableStatisticType.java @@ -0,0 +1,19 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.spi.statistics; + +public enum TableStatisticType +{ + ROW_COUNT, +} diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/statistics/TableStatisticsMetadata.java b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/TableStatisticsMetadata.java new file mode 100644 index 0000000000000..79d098b487604 --- /dev/null +++ b/presto-spi/src/main/java/com/facebook/presto/spi/statistics/TableStatisticsMetadata.java @@ -0,0 +1,69 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.spi.statistics; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static java.util.Collections.emptyList; +import static java.util.Collections.emptySet; +import static java.util.Collections.unmodifiableList; +import static java.util.Collections.unmodifiableSet; +import static java.util.Objects.requireNonNull; + +public class TableStatisticsMetadata +{ + private static final TableStatisticsMetadata EMPTY_STATISTICS_METADATA = new TableStatisticsMetadata(emptySet(), emptySet(), emptyList()); + + private final Set columnStatistics; + private final Set tableStatistics; + private final List groupingColumns; + + public static TableStatisticsMetadata empty() + { + return EMPTY_STATISTICS_METADATA; + } + + public TableStatisticsMetadata( + Set columnStatistics, + Set tableStatistics, + List groupingColumns) + { + this.columnStatistics = unmodifiableSet(new HashSet<>(requireNonNull(columnStatistics, "columnStatistics is null"))); + this.tableStatistics = unmodifiableSet(new HashSet<>(requireNonNull(tableStatistics, "tableStatistics is null"))); + this.groupingColumns = unmodifiableList(new ArrayList<>(requireNonNull(groupingColumns, "groupingColumns is null"))); + } + + public Set getColumnStatistics() + { + return columnStatistics; + } + + public Set getTableStatistics() + { + return tableStatistics; + } + + public List getGroupingColumns() + { + return groupingColumns; + } + + public boolean isEmpty() + { + return tableStatistics.isEmpty() && columnStatistics.isEmpty(); + } +}