diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java index bf7202e2e9918..3540d53caa2ed 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/MetastoreUtil.java @@ -15,6 +15,7 @@ import com.facebook.presto.hive.HdfsEnvironment; import com.facebook.presto.hive.HdfsEnvironment.HdfsContext; +import com.facebook.presto.hive.HiveBasicStatistics; import com.facebook.presto.hive.PartitionOfflineException; import com.facebook.presto.hive.TableOfflineException; import com.facebook.presto.spi.ErrorCodeSupplier; @@ -25,6 +26,8 @@ import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.predicate.Domain; +import com.facebook.presto.spi.statistics.ColumnStatisticType; +import com.facebook.presto.spi.type.ArrayType; import com.facebook.presto.spi.type.BigintType; import com.facebook.presto.spi.type.BooleanType; import com.facebook.presto.spi.type.CharType; @@ -33,7 +36,9 @@ import com.facebook.presto.spi.type.Decimals; import com.facebook.presto.spi.type.DoubleType; import com.facebook.presto.spi.type.IntegerType; +import com.facebook.presto.spi.type.MapType; import com.facebook.presto.spi.type.RealType; +import com.facebook.presto.spi.type.RowType; import com.facebook.presto.spi.type.SmallintType; import com.facebook.presto.spi.type.StandardTypes; import com.facebook.presto.spi.type.TimestampType; @@ -43,6 +48,9 @@ import com.facebook.presto.spi.type.VarcharType; import com.google.common.base.CharMatcher; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.primitives.Longs; import io.airlift.slice.Slice; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -55,6 +63,8 @@ import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; +import javax.annotation.Nullable; + import java.io.IOException; import java.math.BigInteger; import java.sql.Date; @@ -67,13 +77,34 @@ import java.util.Map.Entry; import java.util.Objects; import java.util.Optional; +import java.util.OptionalLong; import java.util.Properties; +import java.util.Set; import java.util.concurrent.TimeUnit; import static com.facebook.presto.hive.MetastoreErrorCode.HIVE_FILESYSTEM_ERROR; import static com.facebook.presto.hive.MetastoreErrorCode.HIVE_INVALID_PARTITION_VALUE; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.MIN_VALUE; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES; +import static com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES; +import static com.facebook.presto.spi.type.BigintType.BIGINT; +import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; +import static com.facebook.presto.spi.type.Chars.isCharType; import static com.facebook.presto.spi.type.Chars.padSpaces; +import static com.facebook.presto.spi.type.DateType.DATE; +import static com.facebook.presto.spi.type.DoubleType.DOUBLE; +import static com.facebook.presto.spi.type.IntegerType.INTEGER; +import static com.facebook.presto.spi.type.RealType.REAL; +import static com.facebook.presto.spi.type.SmallintType.SMALLINT; +import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; +import static com.facebook.presto.spi.type.TinyintType.TINYINT; +import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.spi.type.Varchars.isVarcharType; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.isNullOrEmpty; import static com.google.common.base.Strings.padEnd; @@ -108,7 +139,13 @@ public class MetastoreUtil public static final String HIVE_DEFAULT_DYNAMIC_PARTITION = "__HIVE_DEFAULT_PARTITION__"; @SuppressWarnings("OctalInteger") public static final FsPermission ALL_PERMISSIONS = new FsPermission((short) 0777); + private static final String PARTITION_VALUE_WILDCARD = ""; + private static final String NUM_FILES = "numFiles"; + private static final String NUM_ROWS = "numRows"; + private static final String RAW_DATA_SIZE = "rawDataSize"; + private static final String TOTAL_SIZE = "totalSize"; + private static final Set STATS_PROPERTIES = ImmutableSet.of(NUM_FILES, NUM_ROWS, RAW_DATA_SIZE, TOTAL_SIZE); private MetastoreUtil() { @@ -693,4 +730,103 @@ else if (type instanceof TinyintType } return val; } + + /** + * Hive calculates NDV considering null as a distinct value + */ + public static OptionalLong fromMetastoreDistinctValuesCount(OptionalLong distinctValuesCount, OptionalLong nullsCount, OptionalLong rowCount) + { + if (distinctValuesCount.isPresent() && nullsCount.isPresent() && rowCount.isPresent()) { + return OptionalLong.of(fromMetastoreDistinctValuesCount(distinctValuesCount.getAsLong(), nullsCount.getAsLong(), rowCount.getAsLong())); + } + return OptionalLong.empty(); + } + + public static long fromMetastoreDistinctValuesCount(long distinctValuesCount, long nullsCount, long rowCount) + { + long nonNullsCount = rowCount - nullsCount; + if (nullsCount > 0 && distinctValuesCount > 0) { + distinctValuesCount--; + } + + // normalize distinctValuesCount in case there is a non null element + if (nonNullsCount > 0 && distinctValuesCount == 0) { + distinctValuesCount = 1; + } + + // the metastore may store an estimate, so the value stored may be higher than the total number of rows + if (distinctValuesCount > nonNullsCount) { + return nonNullsCount; + } + return distinctValuesCount; + } + + public static boolean isNumericType(Type type) + { + return type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT) || + type.equals(DOUBLE) || type.equals(REAL) || + type instanceof DecimalType; + } + + public static Set getSupportedColumnStatistics(Type type) + { + if (type.equals(BOOLEAN)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_TRUE_VALUES); + } + if (isNumericType(type) || type.equals(DATE) || type.equals(TIMESTAMP)) { + // TODO #7122 support non-legacy TIMESTAMP + return ImmutableSet.of(MIN_VALUE, MAX_VALUE, NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES); + } + if (isVarcharType(type) || isCharType(type)) { + // TODO Collect MIN,MAX once it is used by the optimizer + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_DISTINCT_VALUES, TOTAL_SIZE_IN_BYTES, MAX_VALUE_SIZE_IN_BYTES); + } + if (type.equals(VARBINARY)) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, TOTAL_SIZE_IN_BYTES, MAX_VALUE_SIZE_IN_BYTES); + } + if (type instanceof ArrayType || type instanceof RowType || type instanceof MapType) { + return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, TOTAL_SIZE_IN_BYTES); + } + // Throwing here to make sure this method is updated when a new type is added in Hive connector + throw new IllegalArgumentException("Unsupported type: " + type); + } + + public static HiveBasicStatistics getHiveBasicStatistics(Map parameters) + { + OptionalLong numFiles = parse(parameters.get(NUM_FILES)); + OptionalLong numRows = parse(parameters.get(NUM_ROWS)); + OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE)); + OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE)); + return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes); + } + + private static OptionalLong parse(@Nullable String parameterValue) + { + if (parameterValue == null) { + return OptionalLong.empty(); + } + Long longValue = Longs.tryParse(parameterValue); + if (longValue == null || longValue < 0) { + return OptionalLong.empty(); + } + return OptionalLong.of(longValue); + } + + public static Map updateStatisticsParameters(Map parameters, HiveBasicStatistics statistics) + { + ImmutableMap.Builder result = ImmutableMap.builder(); + + parameters.forEach((key, value) -> { + if (!STATS_PROPERTIES.contains(key)) { + result.put(key, value); + } + }); + + statistics.getFileCount().ifPresent(count -> result.put(NUM_FILES, Long.toString(count))); + statistics.getRowCount().ifPresent(count -> result.put(NUM_ROWS, Long.toString(count))); + statistics.getInMemoryDataSizeInBytes().ifPresent(size -> result.put(RAW_DATA_SIZE, Long.toString(size))); + statistics.getOnDiskDataSizeInBytes().ifPresent(size -> result.put(TOTAL_SIZE, Long.toString(size))); + + return result.build(); + } } diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioHiveMetastore.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioHiveMetastore.java index 5b26561495bf5..621ee903aa402 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioHiveMetastore.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioHiveMetastore.java @@ -15,6 +15,7 @@ import alluxio.client.table.TableMasterClient; import alluxio.exception.status.AlluxioStatusException; +import alluxio.grpc.table.ColumnStatisticsInfo; import alluxio.grpc.table.Constraint; import alluxio.grpc.table.layout.hive.PartitionInfo; import com.facebook.presto.hive.HiveBasicStatistics; @@ -22,16 +23,19 @@ import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.Database; import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; +import com.facebook.presto.hive.metastore.MetastoreUtil; import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.PartitionStatistics; import com.facebook.presto.hive.metastore.PartitionWithStatistics; import com.facebook.presto.hive.metastore.PrincipalPrivileges; import com.facebook.presto.hive.metastore.Table; import com.facebook.presto.hive.metastore.thrift.HiveMetastore; -import com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil; import com.facebook.presto.spi.NotFoundException; import com.facebook.presto.spi.PrestoException; +import com.facebook.presto.spi.SchemaTableName; +import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.security.PrestoPrincipal; import com.facebook.presto.spi.security.RoleGrant; @@ -45,15 +49,16 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; import java.util.Set; import java.util.function.Function; import static com.facebook.presto.hive.MetastoreErrorCode.HIVE_METASTORE_ERROR; import static com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts; +import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static java.util.Objects.requireNonNull; -import static java.util.function.Function.identity; /** * Implementation of the {@link HiveMetastore} interface through Alluxio. @@ -108,19 +113,26 @@ public Optional getTable(String databaseName, String tableName) @Override public Set getSupportedColumnStatistics(Type type) { - throw new UnsupportedOperationException("getSupportedColumnStatistics is not supported in AlluxioHiveMetastore"); + return MetastoreUtil.getSupportedColumnStatistics(type); + } + + private Map groupStatisticsByColumn(List statistics, OptionalLong rowCount) + { + return statistics.stream() + .collect(toImmutableMap(ColumnStatisticsInfo::getColName, statisticsInfo -> AlluxioProtoUtils.fromProto(statisticsInfo.getData(), rowCount))); } @Override public PartitionStatistics getTableStatistics(String databaseName, String tableName) { try { - Table table = getTable(databaseName, tableName).orElseThrow(() -> new PrestoException( - HIVE_METASTORE_ERROR, - String.format("Could not retrieve table %s.%s", databaseName, tableName))); - HiveBasicStatistics basicStats = ThriftMetastoreUtil.getHiveBasicStatistics(table.getParameters()); - // TODO implement logic to populate Map - return new PartitionStatistics(basicStats, ImmutableMap.of()); + Table table = getTable(databaseName, tableName).orElseThrow( + () -> new PrestoException(HIVE_METASTORE_ERROR, String.format("Could not retrieve table %s.%s", databaseName, tableName))); + HiveBasicStatistics basicStatistics = getHiveBasicStatistics(table.getParameters()); + List columns = table.getPartitionColumns(); + List columnNames = columns.stream().map(Column::getName).collect(toImmutableList()); + List columnStatistics = client.getTableColumnStatistics(table.getDatabaseName(), table.getTableName(), columnNames); + return new PartitionStatistics(basicStatistics, groupStatisticsByColumn(columnStatistics, basicStatistics.getRowCount())); } catch (Exception e) { throw new PrestoException(HIVE_METASTORE_ERROR, e); @@ -130,9 +142,45 @@ public PartitionStatistics getTableStatistics(String databaseName, String tableN @Override public Map getPartitionStatistics(String databaseName, String tableName, Set partitionNames) { - // TODO implement partition statistics - // currently returns a map of partitionName to empty statistics to satisfy presto requirements - return partitionNames.stream().collect(toImmutableMap(identity(), (p) -> PartitionStatistics.empty())); + Table table = getTable(databaseName, tableName).orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + + Map partitionBasicStatistics = getPartitionsByNames(databaseName, tableName, ImmutableList.copyOf(partitionNames)).entrySet().stream() + .filter(entry -> entry.getValue().isPresent()) + .collect(toImmutableMap( + entry -> MetastoreUtil.makePartName(table.getPartitionColumns(), entry.getValue().get().getValues()), + entry -> getHiveBasicStatistics(entry.getValue().get().getParameters()))); + + Map partitionRowCounts = partitionBasicStatistics.entrySet().stream() + .collect(toImmutableMap(Map.Entry::getKey, entry -> entry.getValue().getRowCount())); + + List dataColumns = table.getDataColumns().stream() + .map(Column::getName) + .collect(toImmutableList()); + Map> columnStatisticss; + try { + columnStatisticss = client.getPartitionColumnStatistics( + table.getDatabaseName(), + table.getTableName(), + partitionBasicStatistics.keySet().stream().collect(toImmutableList()), + dataColumns); + } + catch (AlluxioStatusException e) { + throw new PrestoException(HIVE_METASTORE_ERROR, e); + } + + Map> partitionColumnStatistics = columnStatisticss.entrySet().stream() + .filter(entry -> !entry.getValue().isEmpty()) + .collect(toImmutableMap( + Map.Entry::getKey, + entry -> groupStatisticsByColumn(entry.getValue(), partitionRowCounts.getOrDefault(entry.getKey(), OptionalLong.empty())))); + + ImmutableMap.Builder result = ImmutableMap.builder(); + for (String partitionName : partitionBasicStatistics.keySet()) { + HiveBasicStatistics basicStatistics = partitionBasicStatistics.get(partitionName); + Map columnStatistics = partitionColumnStatistics.getOrDefault(partitionName, ImmutableMap.of()); + result.put(partitionName, new PartitionStatistics(basicStatistics, columnStatistics)); + } + return result.build(); } @Override diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioProtoUtils.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioProtoUtils.java index 4f05fccc00858..33cba3d9ea917 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioProtoUtils.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/alluxio/AlluxioProtoUtils.java @@ -13,26 +13,56 @@ */ package com.facebook.presto.hive.metastore.alluxio; +import alluxio.grpc.table.BinaryColumnStatsData; +import alluxio.grpc.table.BooleanColumnStatsData; +import alluxio.grpc.table.ColumnStatisticsData; +import alluxio.grpc.table.Date; +import alluxio.grpc.table.DateColumnStatsData; +import alluxio.grpc.table.Decimal; +import alluxio.grpc.table.DecimalColumnStatsData; +import alluxio.grpc.table.DoubleColumnStatsData; import alluxio.grpc.table.FieldSchema; import alluxio.grpc.table.Layout; +import alluxio.grpc.table.LongColumnStatsData; +import alluxio.grpc.table.StringColumnStatsData; import alluxio.grpc.table.layout.hive.PartitionInfo; import alluxio.shaded.client.com.google.protobuf.InvalidProtocolBufferException; import com.facebook.presto.hive.HiveBucketProperty; import com.facebook.presto.hive.HiveType; import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.Database; +import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.PrestoTableType; import com.facebook.presto.hive.metastore.SortingColumn; import com.facebook.presto.hive.metastore.StorageFormat; import com.facebook.presto.hive.metastore.Table; +import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.security.PrincipalType; import com.google.common.collect.Lists; +import javax.annotation.Nullable; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.time.LocalDate; import java.util.List; import java.util.Optional; +import java.util.OptionalDouble; +import java.util.OptionalLong; import java.util.Set; +import static com.facebook.presto.hive.MetastoreErrorCode.HIVE_INVALID_METADATA; +import static com.facebook.presto.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics; +import static com.facebook.presto.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics; +import static com.facebook.presto.hive.metastore.HiveColumnStatistics.createDateColumnStatistics; +import static com.facebook.presto.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics; +import static com.facebook.presto.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics; +import static com.facebook.presto.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics; +import static com.facebook.presto.hive.metastore.HiveColumnStatistics.createStringColumnStatistics; +import static com.facebook.presto.hive.metastore.MetastoreUtil.fromMetastoreDistinctValuesCount; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreNullsCount; +import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getTotalSizeInBytes; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; @@ -92,25 +122,76 @@ public static Table fromProto(alluxio.grpc.table.TableInfo table) } } - private static SortingColumn fromProto(alluxio.grpc.table.layout.hive.SortingColumn column) + public static HiveColumnStatistics fromProto(ColumnStatisticsData columnStatistics, OptionalLong rowCount) { - if (column.getOrder().equals(alluxio.grpc.table.layout.hive.SortingColumn.SortingOrder.ASCENDING)) { - return new SortingColumn(column.getColumnName(), SortingColumn.Order.ASCENDING); + if (columnStatistics.hasLongStats()) { + LongColumnStatsData longStatsData = columnStatistics.getLongStats(); + OptionalLong min = longStatsData.hasLowValue() ? OptionalLong.of(longStatsData.getLowValue()) : OptionalLong.empty(); + OptionalLong max = longStatsData.hasHighValue() ? OptionalLong.of(longStatsData.getHighValue()) : OptionalLong.empty(); + OptionalLong nullsCount = longStatsData.hasNumNulls() ? fromMetastoreNullsCount(longStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong distinctValuesCount = longStatsData.hasNumDistincts() ? OptionalLong.of(longStatsData.getNumDistincts()) : OptionalLong.empty(); + return createIntegerColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); } - if (column.getOrder().equals(alluxio.grpc.table.layout.hive.SortingColumn.SortingOrder.DESCENDING)) { - return new SortingColumn(column.getColumnName(), SortingColumn.Order.DESCENDING); + if (columnStatistics.hasDoubleStats()) { + DoubleColumnStatsData doubleStatsData = columnStatistics.getDoubleStats(); + OptionalDouble min = doubleStatsData.hasLowValue() ? OptionalDouble.of(doubleStatsData.getLowValue()) : OptionalDouble.empty(); + OptionalDouble max = doubleStatsData.hasHighValue() ? OptionalDouble.of(doubleStatsData.getHighValue()) : OptionalDouble.empty(); + OptionalLong nullsCount = doubleStatsData.hasNumNulls() ? fromMetastoreNullsCount(doubleStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong distinctValuesCount = doubleStatsData.hasNumDistincts() ? OptionalLong.of(doubleStatsData.getNumDistincts()) : OptionalLong.empty(); + return createDoubleColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); } - throw new IllegalArgumentException("Invalid sort order: " + column.getOrder()); - } - - private static Optional fromProto(alluxio.grpc.table.layout.hive.HiveBucketProperty property) - { - // must return empty if buckets <= 0 - if (!property.hasBucketCount() || property.getBucketCount() <= 0) { - return Optional.empty(); + if (columnStatistics.hasDecimalStats()) { + DecimalColumnStatsData decimalStatsData = columnStatistics.getDecimalStats(); + Optional min = decimalStatsData.hasLowValue() ? fromMetastoreDecimal(decimalStatsData.getLowValue()) : Optional.empty(); + Optional max = decimalStatsData.hasHighValue() ? fromMetastoreDecimal(decimalStatsData.getHighValue()) : Optional.empty(); + OptionalLong nullsCount = decimalStatsData.hasNumNulls() ? fromMetastoreNullsCount(decimalStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong distinctValuesCount = decimalStatsData.hasNumDistincts() ? OptionalLong.of(decimalStatsData.getNumDistincts()) : OptionalLong.empty(); + return createDecimalColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); } - List sortedBy = property.getSortedByList().stream().map(AlluxioProtoUtils::fromProto).collect(toImmutableList()); - return Optional.of(new HiveBucketProperty(property.getBucketedByList(), (int) property.getBucketCount(), sortedBy)); + if (columnStatistics.hasDateStats()) { + DateColumnStatsData dateStatsData = columnStatistics.getDateStats(); + Optional min = dateStatsData.hasLowValue() ? fromMetastoreDate(dateStatsData.getLowValue()) : Optional.empty(); + Optional max = dateStatsData.hasHighValue() ? fromMetastoreDate(dateStatsData.getHighValue()) : Optional.empty(); + OptionalLong nullsCount = dateStatsData.hasNumNulls() ? fromMetastoreNullsCount(dateStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong distinctValuesCount = dateStatsData.hasNumDistincts() ? OptionalLong.of(dateStatsData.getNumDistincts()) : OptionalLong.empty(); + return createDateColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); + } + if (columnStatistics.hasBooleanStats()) { + BooleanColumnStatsData booleanStatsData = columnStatistics.getBooleanStats(); + OptionalLong trueCount = OptionalLong.empty(); + OptionalLong falseCount = OptionalLong.empty(); + if (booleanStatsData.hasNumTrues() && booleanStatsData.hasNumFalses() && (booleanStatsData.getNumFalses() != -1)) { + trueCount = OptionalLong.of(booleanStatsData.getNumTrues()); + falseCount = OptionalLong.of(booleanStatsData.getNumFalses()); + } + return createBooleanColumnStatistics( + trueCount, + falseCount, + booleanStatsData.hasNumNulls() ? fromMetastoreNullsCount(booleanStatsData.getNumNulls()) : OptionalLong.empty()); + } + if (columnStatistics.hasStringStats()) { + StringColumnStatsData stringStatsData = columnStatistics.getStringStats(); + OptionalLong maxColumnLength = stringStatsData.hasMaxColLen() ? OptionalLong.of(stringStatsData.getMaxColLen()) : OptionalLong.empty(); + OptionalDouble averageColumnLength = stringStatsData.hasAvgColLen() ? OptionalDouble.of(stringStatsData.getAvgColLen()) : OptionalDouble.empty(); + OptionalLong nullsCount = stringStatsData.hasNumNulls() ? fromMetastoreNullsCount(stringStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong distinctValuesCount = stringStatsData.hasNumDistincts() ? OptionalLong.of(stringStatsData.getNumDistincts()) : OptionalLong.empty(); + return createStringColumnStatistics( + maxColumnLength, + getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), + nullsCount, + fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); + } + if (columnStatistics.hasBinaryStats()) { + BinaryColumnStatsData binaryStatsData = columnStatistics.getBinaryStats(); + OptionalLong maxColumnLength = binaryStatsData.hasMaxColLen() ? OptionalLong.of(binaryStatsData.getMaxColLen()) : OptionalLong.empty(); + OptionalDouble averageColumnLength = binaryStatsData.hasAvgColLen() ? OptionalDouble.of(binaryStatsData.getAvgColLen()) : OptionalDouble.empty(); + OptionalLong nullsCount = binaryStatsData.hasNumNulls() ? fromMetastoreNullsCount(binaryStatsData.getNumNulls()) : OptionalLong.empty(); + return createBinaryColumnStatistics( + maxColumnLength, + getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), + nullsCount); + } + throw new PrestoException(HIVE_INVALID_METADATA, "Invalid column statistics data: " + columnStatistics); } private static Column fromProto(alluxio.grpc.table.FieldSchema column) @@ -138,7 +219,7 @@ public static Partition fromProto(alluxio.grpc.table.layout.hive.PartitionInfo i return builder.build(); } - static StorageFormat fromProto(alluxio.grpc.table.layout.hive.StorageFormat format) + public static StorageFormat fromProto(alluxio.grpc.table.layout.hive.StorageFormat format) { return StorageFormat.create(format.getSerde(), format.getInputFormat(), format.getOutputFormat()); } @@ -157,4 +238,41 @@ public static List toPartitionInfo { return parts.stream().map(AlluxioProtoUtils::toPartitionInfo).collect(toImmutableList()); } + + private static SortingColumn fromProto(alluxio.grpc.table.layout.hive.SortingColumn column) + { + if (column.getOrder().equals(alluxio.grpc.table.layout.hive.SortingColumn.SortingOrder.ASCENDING)) { + return new SortingColumn(column.getColumnName(), SortingColumn.Order.ASCENDING); + } + if (column.getOrder().equals(alluxio.grpc.table.layout.hive.SortingColumn.SortingOrder.DESCENDING)) { + return new SortingColumn(column.getColumnName(), SortingColumn.Order.DESCENDING); + } + throw new IllegalArgumentException("Invalid sort order: " + column.getOrder()); + } + + private static Optional fromProto(alluxio.grpc.table.layout.hive.HiveBucketProperty property) + { + // must return empty if buckets <= 0 + if (!property.hasBucketCount() || property.getBucketCount() <= 0) { + return Optional.empty(); + } + List sortedBy = property.getSortedByList().stream().map(AlluxioProtoUtils::fromProto).collect(toImmutableList()); + return Optional.of(new HiveBucketProperty(property.getBucketedByList(), (int) property.getBucketCount(), sortedBy)); + } + + private static Optional fromMetastoreDecimal(@Nullable Decimal decimal) + { + if (decimal == null) { + return Optional.empty(); + } + return Optional.of(new BigDecimal(new BigInteger(decimal.getUnscaled().toByteArray()), decimal.getScale())); + } + + private static Optional fromMetastoreDate(@Nullable Date date) + { + if (date == null) { + return Optional.empty(); + } + return Optional.of(LocalDate.ofEpochDay(date.getDaysSinceEpoch())); + } } diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java index bda0fb4f77286..1ddeebca3d227 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/FileHiveMetastore.java @@ -26,12 +26,12 @@ import com.facebook.presto.hive.metastore.ExtendedHiveMetastore; import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; +import com.facebook.presto.hive.metastore.MetastoreUtil; import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.PartitionStatistics; import com.facebook.presto.hive.metastore.PartitionWithStatistics; import com.facebook.presto.hive.metastore.PrincipalPrivileges; import com.facebook.presto.hive.metastore.Table; -import com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil; import com.facebook.presto.spi.ColumnNotFoundException; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaNotFoundException; @@ -79,15 +79,15 @@ import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; import static com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts; import static com.facebook.presto.hive.metastore.MetastoreUtil.extractPartitionValues; +import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics; import static com.facebook.presto.hive.metastore.MetastoreUtil.makePartName; import static com.facebook.presto.hive.metastore.MetastoreUtil.toPartitionValues; +import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.hive.metastore.MetastoreUtil.verifyCanDropColumn; import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.TEMPORARY_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.VIRTUAL_VIEW; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.security.PrincipalType.ROLE; @@ -282,7 +282,7 @@ public synchronized Optional
getTable(String databaseName, String tableNa @Override public Set getSupportedColumnStatistics(Type type) { - return ThriftMetastoreUtil.getSupportedColumnStatistics(type); + return MetastoreUtil.getSupportedColumnStatistics(type); } @Override diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java index 28e65fc06a715..18e324a5a520f 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/file/PartitionMetadata.java @@ -33,9 +33,9 @@ import java.util.Map; import java.util.Optional; +import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE; import static com.facebook.presto.hive.metastore.StorageFormat.VIEW_STORAGE_FORMAT; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; import static java.util.Objects.requireNonNull; public class PartitionMetadata diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java index 7794b54709a06..770e3ba561197 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/GlueHiveMetastore.java @@ -109,16 +109,16 @@ import static com.facebook.presto.hive.MetastoreErrorCode.HIVE_PARTITION_DROPPED_DURING_QUERY; import static com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts; import static com.facebook.presto.hive.metastore.MetastoreUtil.createDirectory; +import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics; import static com.facebook.presto.hive.metastore.MetastoreUtil.makePartName; import static com.facebook.presto.hive.metastore.MetastoreUtil.toPartitionValues; +import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.hive.metastore.MetastoreUtil.verifyCanDropColumn; import static com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.VIRTUAL_VIEW; import static com.facebook.presto.hive.metastore.glue.GlueExpressionUtil.buildGlueExpression; import static com.facebook.presto.hive.metastore.glue.converter.GlueInputConverter.convertColumn; import static com.facebook.presto.hive.metastore.glue.converter.GlueInputConverter.toTableInput; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.security.PrincipalType.USER; diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/converter/GlueInputConverter.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/converter/GlueInputConverter.java index d325f6d05cad1..62a85e73d97b5 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/converter/GlueInputConverter.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/glue/converter/GlueInputConverter.java @@ -31,10 +31,10 @@ import java.util.EnumSet; import java.util.List; +import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.VIRTUAL_VIEW; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.Preconditions.checkArgument; import static java.util.stream.Collectors.toList; diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java index b41e3e4112003..9bb723e90c165 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftHiveMetastore.java @@ -23,6 +23,7 @@ import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; +import com.facebook.presto.hive.metastore.MetastoreUtil; import com.facebook.presto.hive.metastore.PartitionStatistics; import com.facebook.presto.hive.metastore.PartitionWithStatistics; import com.facebook.presto.spi.PrestoException; @@ -81,15 +82,15 @@ import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.OWNERSHIP; import static com.facebook.presto.hive.metastore.MetastoreUtil.PRESTO_VIEW_FLAG; import static com.facebook.presto.hive.metastore.MetastoreUtil.convertPredicateToParts; +import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics; +import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.createMetastoreColumnStatistics; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiPrincipalType; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromPrestoPrincipalType; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromRolePrincipalGrants; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.parsePrivilege; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.toMetastoreApiPartition; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.spi.StandardErrorCode.ALREADY_EXISTS; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.facebook.presto.spi.security.PrincipalType.USER; @@ -250,7 +251,7 @@ public Optional
getTable(String databaseName, String tableName) @Override public Set getSupportedColumnStatistics(Type type) { - return ThriftMetastoreUtil.getSupportedColumnStatistics(type); + return MetastoreUtil.getSupportedColumnStatistics(type); } private static boolean isPrestoView(Table table) diff --git a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java index 84675d1097104..ae25ab708d75a 100644 --- a/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java +++ b/presto-hive-metastore/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java @@ -13,13 +13,13 @@ */ package com.facebook.presto.hive.metastore.thrift; -import com.facebook.presto.hive.HiveBasicStatistics; import com.facebook.presto.hive.HiveBucketProperty; import com.facebook.presto.hive.HiveType; import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.Database; import com.facebook.presto.hive.metastore.HiveColumnStatistics; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; +import com.facebook.presto.hive.metastore.MetastoreUtil; import com.facebook.presto.hive.metastore.Partition; import com.facebook.presto.hive.metastore.PartitionWithStatistics; import com.facebook.presto.hive.metastore.PrestoTableType; @@ -34,17 +34,10 @@ import com.facebook.presto.spi.security.PrincipalType; import com.facebook.presto.spi.security.RoleGrant; import com.facebook.presto.spi.security.SelectedRole; -import com.facebook.presto.spi.statistics.ColumnStatisticType; -import com.facebook.presto.spi.type.ArrayType; -import com.facebook.presto.spi.type.DecimalType; -import com.facebook.presto.spi.type.MapType; -import com.facebook.presto.spi.type.RowType; -import com.facebook.presto.spi.type.Type; import com.google.common.collect.AbstractIterator; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Streams; -import com.google.common.primitives.Longs; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -103,31 +96,13 @@ import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.SELECT; import static com.facebook.presto.hive.metastore.HivePrivilegeInfo.HivePrivilege.UPDATE; import static com.facebook.presto.hive.metastore.MetastoreUtil.AVRO_SCHEMA_URL_KEY; +import static com.facebook.presto.hive.metastore.MetastoreUtil.fromMetastoreDistinctValuesCount; import static com.facebook.presto.hive.metastore.PrestoTableType.EXTERNAL_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.MANAGED_TABLE; import static com.facebook.presto.hive.metastore.PrestoTableType.OTHER; import static com.facebook.presto.hive.metastore.PrestoTableType.VIRTUAL_VIEW; import static com.facebook.presto.spi.security.PrincipalType.ROLE; import static com.facebook.presto.spi.security.PrincipalType.USER; -import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE; -import static com.facebook.presto.spi.statistics.ColumnStatisticType.MAX_VALUE_SIZE_IN_BYTES; -import static com.facebook.presto.spi.statistics.ColumnStatisticType.MIN_VALUE; -import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES; -import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES; -import static com.facebook.presto.spi.statistics.ColumnStatisticType.NUMBER_OF_TRUE_VALUES; -import static com.facebook.presto.spi.statistics.ColumnStatisticType.TOTAL_SIZE_IN_BYTES; -import static com.facebook.presto.spi.type.BigintType.BIGINT; -import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; -import static com.facebook.presto.spi.type.Chars.isCharType; -import static com.facebook.presto.spi.type.DateType.DATE; -import static com.facebook.presto.spi.type.DoubleType.DOUBLE; -import static com.facebook.presto.spi.type.IntegerType.INTEGER; -import static com.facebook.presto.spi.type.RealType.REAL; -import static com.facebook.presto.spi.type.SmallintType.SMALLINT; -import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; -import static com.facebook.presto.spi.type.TinyintType.TINYINT; -import static com.facebook.presto.spi.type.VarbinaryType.VARBINARY; -import static com.facebook.presto.spi.type.Varchars.isVarcharType; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.emptyToNull; import static com.google.common.base.Strings.nullToEmpty; @@ -150,11 +125,6 @@ public final class ThriftMetastoreUtil { private static final String PUBLIC_ROLE_NAME = "public"; private static final String ADMIN_ROLE_NAME = "admin"; - private static final String NUM_FILES = "numFiles"; - private static final String NUM_ROWS = "numRows"; - private static final String RAW_DATA_SIZE = "rawDataSize"; - private static final String TOTAL_SIZE = "totalSize"; - private static final Set STATS_PROPERTIES = ImmutableSet.of(NUM_FILES, NUM_ROWS, RAW_DATA_SIZE, TOTAL_SIZE); private ThriftMetastoreUtil() {} @@ -372,7 +342,7 @@ public static Stream listEnabledRoles(ConnectorIdentity identity, Functi public static org.apache.hadoop.hive.metastore.api.Partition toMetastoreApiPartition(PartitionWithStatistics partitionWithStatistics) { org.apache.hadoop.hive.metastore.api.Partition partition = toMetastoreApiPartition(partitionWithStatistics.getPartition()); - partition.setParameters(updateStatisticsParameters(partition.getParameters(), partitionWithStatistics.getStatistics().getBasicStatistics())); + partition.setParameters(MetastoreUtil.updateStatisticsParameters(partition.getParameters(), partitionWithStatistics.getStatistics().getBasicStatistics())); return partition; } @@ -596,36 +566,6 @@ public static OptionalLong getTotalSizeInBytes(OptionalDouble averageColumnLengt return OptionalLong.empty(); } - /** - * Hive calculates NDV considering null as a distinct value - */ - private static OptionalLong fromMetastoreDistinctValuesCount(OptionalLong distinctValuesCount, OptionalLong nullsCount, OptionalLong rowCount) - { - if (distinctValuesCount.isPresent() && nullsCount.isPresent() && rowCount.isPresent()) { - return OptionalLong.of(fromMetastoreDistinctValuesCount(distinctValuesCount.getAsLong(), nullsCount.getAsLong(), rowCount.getAsLong())); - } - return OptionalLong.empty(); - } - - private static long fromMetastoreDistinctValuesCount(long distinctValuesCount, long nullsCount, long rowCount) - { - long nonNullsCount = rowCount - nullsCount; - if (nullsCount > 0 && distinctValuesCount > 0) { - distinctValuesCount--; - } - - // normalize distinctValuesCount in case there is a non null element - if (nonNullsCount > 0 && distinctValuesCount == 0) { - distinctValuesCount = 1; - } - - // the metastore may store an estimate, so the value stored may be higher than the total number of rows - if (distinctValuesCount > nonNullsCount) { - return nonNullsCount; - } - return distinctValuesCount; - } - public static Set fromRolePrincipalGrants(Collection grants) { return ImmutableSet.copyOf(grants.stream().map(ThriftMetastoreUtil::fromRolePrincipalGrant).collect(toList())); @@ -747,45 +687,6 @@ public static Set parsePrivilege(PrivilegeGrantInfo userGrant } } - public static HiveBasicStatistics getHiveBasicStatistics(Map parameters) - { - OptionalLong numFiles = parse(parameters.get(NUM_FILES)); - OptionalLong numRows = parse(parameters.get(NUM_ROWS)); - OptionalLong inMemoryDataSizeInBytes = parse(parameters.get(RAW_DATA_SIZE)); - OptionalLong onDiskDataSizeInBytes = parse(parameters.get(TOTAL_SIZE)); - return new HiveBasicStatistics(numFiles, numRows, inMemoryDataSizeInBytes, onDiskDataSizeInBytes); - } - - private static OptionalLong parse(@Nullable String parameterValue) - { - if (parameterValue == null) { - return OptionalLong.empty(); - } - Long longValue = Longs.tryParse(parameterValue); - if (longValue == null || longValue < 0) { - return OptionalLong.empty(); - } - return OptionalLong.of(longValue); - } - - public static Map updateStatisticsParameters(Map parameters, HiveBasicStatistics statistics) - { - ImmutableMap.Builder result = ImmutableMap.builder(); - - parameters.forEach((key, value) -> { - if (!STATS_PROPERTIES.contains(key)) { - result.put(key, value); - } - }); - - statistics.getFileCount().ifPresent(count -> result.put(NUM_FILES, Long.toString(count))); - statistics.getRowCount().ifPresent(count -> result.put(NUM_ROWS, Long.toString(count))); - statistics.getInMemoryDataSizeInBytes().ifPresent(size -> result.put(RAW_DATA_SIZE, Long.toString(size))); - statistics.getOnDiskDataSizeInBytes().ifPresent(size -> result.put(TOTAL_SIZE, Long.toString(size))); - - return result.build(); - } - public static ColumnStatisticsObj createMetastoreColumnStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics, OptionalLong rowCount) { TypeInfo typeInfo = columnType.getTypeInfo(); @@ -926,34 +827,4 @@ private static OptionalDouble getAverageColumnLength(OptionalLong totalSizeInByt } return OptionalDouble.empty(); } - - public static Set getSupportedColumnStatistics(Type type) - { - if (type.equals(BOOLEAN)) { - return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_TRUE_VALUES); - } - if (isNumericType(type) || type.equals(DATE) || type.equals(TIMESTAMP)) { - // TODO #7122 support non-legacy TIMESTAMP - return ImmutableSet.of(MIN_VALUE, MAX_VALUE, NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES); - } - if (isVarcharType(type) || isCharType(type)) { - // TODO Collect MIN,MAX once it is used by the optimizer - return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, NUMBER_OF_DISTINCT_VALUES, TOTAL_SIZE_IN_BYTES, MAX_VALUE_SIZE_IN_BYTES); - } - if (type.equals(VARBINARY)) { - return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, TOTAL_SIZE_IN_BYTES, MAX_VALUE_SIZE_IN_BYTES); - } - if (type instanceof ArrayType || type instanceof RowType || type instanceof MapType) { - return ImmutableSet.of(NUMBER_OF_NON_NULL_VALUES, TOTAL_SIZE_IN_BYTES); - } - // Throwing here to make sure this method is updated when a new type is added in Hive connector - throw new IllegalArgumentException("Unsupported type: " + type); - } - - private static boolean isNumericType(Type type) - { - return type.equals(BIGINT) || type.equals(INTEGER) || type.equals(SMALLINT) || type.equals(TINYINT) || - type.equals(DOUBLE) || type.equals(REAL) || - type instanceof DecimalType; - } } diff --git a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java index 3d8dc5a0c95bb..24cf727a784a1 100644 --- a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java +++ b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/InMemoryHiveMetastore.java @@ -17,6 +17,7 @@ import com.facebook.presto.hive.TableAlreadyExistsException; import com.facebook.presto.hive.metastore.Column; import com.facebook.presto.hive.metastore.HivePrivilegeInfo; +import com.facebook.presto.hive.metastore.MetastoreUtil; import com.facebook.presto.hive.metastore.PartitionStatistics; import com.facebook.presto.hive.metastore.PartitionWithStatistics; import com.facebook.presto.spi.PrestoException; @@ -417,7 +418,7 @@ public synchronized Optional
getTable(String databaseName, String tableNa @Override public Set getSupportedColumnStatistics(Type type) { - return ThriftMetastoreUtil.getSupportedColumnStatistics(type); + return MetastoreUtil.getSupportedColumnStatistics(type); } @Override diff --git a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java index bf9a93a02d8c2..3a7c097d2204a 100644 --- a/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java +++ b/presto-hive-metastore/src/test/java/com/facebook/presto/hive/metastore/thrift/TestThriftHiveMetastoreUtil.java @@ -40,9 +40,9 @@ import java.util.OptionalDouble; import java.util.OptionalLong; +import static com.facebook.presto.hive.metastore.MetastoreUtil.getHiveBasicStatistics; +import static com.facebook.presto.hive.metastore.MetastoreUtil.updateStatisticsParameters; import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiColumnStatistics; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.getHiveBasicStatistics; -import static com.facebook.presto.hive.metastore.thrift.ThriftMetastoreUtil.updateStatisticsParameters; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.binaryStats; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.booleanStats; import static org.apache.hadoop.hive.metastore.api.ColumnStatisticsData.dateStats;