diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java index 71acff6df9ec0..9d2322f35ac21 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/metastore/thrift/ThriftMetastoreUtil.java @@ -331,7 +331,7 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis LongColumnStatsData longStatsData = columnStatistics.getStatsData().getLongStats(); OptionalLong min = longStatsData.isSetLowValue() ? OptionalLong.of(longStatsData.getLowValue()) : OptionalLong.empty(); OptionalLong max = longStatsData.isSetHighValue() ? OptionalLong.of(longStatsData.getHighValue()) : OptionalLong.empty(); - OptionalLong nullsCount = longStatsData.isSetNumNulls() ? OptionalLong.of(longStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong nullsCount = longStatsData.isSetNumNulls() ? fromMetastoreNullsCount(longStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = longStatsData.isSetNumDVs() ? OptionalLong.of(longStatsData.getNumDVs()) : OptionalLong.empty(); return createIntegerColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); } @@ -339,7 +339,7 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis DoubleColumnStatsData doubleStatsData = columnStatistics.getStatsData().getDoubleStats(); OptionalDouble min = doubleStatsData.isSetLowValue() ? OptionalDouble.of(doubleStatsData.getLowValue()) : OptionalDouble.empty(); OptionalDouble max = doubleStatsData.isSetHighValue() ? OptionalDouble.of(doubleStatsData.getHighValue()) : OptionalDouble.empty(); - OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? OptionalLong.of(doubleStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? fromMetastoreNullsCount(doubleStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = doubleStatsData.isSetNumDVs() ? OptionalLong.of(doubleStatsData.getNumDVs()) : OptionalLong.empty(); return createDoubleColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); } @@ -347,7 +347,7 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis DecimalColumnStatsData decimalStatsData = columnStatistics.getStatsData().getDecimalStats(); Optional min = decimalStatsData.isSetLowValue() ? fromMetastoreDecimal(decimalStatsData.getLowValue()) : Optional.empty(); Optional max = decimalStatsData.isSetHighValue() ? fromMetastoreDecimal(decimalStatsData.getHighValue()) : Optional.empty(); - OptionalLong nullsCount = decimalStatsData.isSetNumNulls() ? OptionalLong.of(decimalStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong nullsCount = decimalStatsData.isSetNumNulls() ? fromMetastoreNullsCount(decimalStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = decimalStatsData.isSetNumDVs() ? OptionalLong.of(decimalStatsData.getNumDVs()) : OptionalLong.empty(); return createDecimalColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); } @@ -355,7 +355,7 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis DateColumnStatsData dateStatsData = columnStatistics.getStatsData().getDateStats(); Optional min = dateStatsData.isSetLowValue() ? fromMetastoreDate(dateStatsData.getLowValue()) : Optional.empty(); Optional max = dateStatsData.isSetHighValue() ? fromMetastoreDate(dateStatsData.getHighValue()) : Optional.empty(); - OptionalLong nullsCount = dateStatsData.isSetNumNulls() ? OptionalLong.of(dateStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong nullsCount = dateStatsData.isSetNumNulls() ? fromMetastoreNullsCount(dateStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = dateStatsData.isSetNumDVs() ? OptionalLong.of(dateStatsData.getNumDVs()) : OptionalLong.empty(); return createDateColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); } @@ -364,13 +364,13 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis return createBooleanColumnStatistics( booleanStatsData.isSetNumTrues() ? OptionalLong.of(booleanStatsData.getNumTrues()) : OptionalLong.empty(), booleanStatsData.isSetNumFalses() ? OptionalLong.of(booleanStatsData.getNumFalses()) : OptionalLong.empty(), - booleanStatsData.isSetNumNulls() ? OptionalLong.of(booleanStatsData.getNumNulls()) : OptionalLong.empty()); + booleanStatsData.isSetNumNulls() ? fromMetastoreNullsCount(booleanStatsData.getNumNulls()) : OptionalLong.empty()); } if (columnStatistics.getStatsData().isSetStringStats()) { StringColumnStatsData stringStatsData = columnStatistics.getStatsData().getStringStats(); OptionalLong maxColumnLength = stringStatsData.isSetMaxColLen() ? OptionalLong.of(stringStatsData.getMaxColLen()) : OptionalLong.empty(); OptionalDouble averageColumnLength = stringStatsData.isSetAvgColLen() ? OptionalDouble.of(stringStatsData.getAvgColLen()) : OptionalDouble.empty(); - OptionalLong nullsCount = stringStatsData.isSetNumNulls() ? OptionalLong.of(stringStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong nullsCount = stringStatsData.isSetNumNulls() ? fromMetastoreNullsCount(stringStatsData.getNumNulls()) : OptionalLong.empty(); OptionalLong distinctValuesCount = stringStatsData.isSetNumDVs() ? OptionalLong.of(stringStatsData.getNumDVs()) : OptionalLong.empty(); return createStringColumnStatistics( maxColumnLength, @@ -382,7 +382,7 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis BinaryColumnStatsData binaryStatsData = columnStatistics.getStatsData().getBinaryStats(); OptionalLong maxColumnLength = binaryStatsData.isSetMaxColLen() ? OptionalLong.of(binaryStatsData.getMaxColLen()) : OptionalLong.empty(); OptionalDouble averageColumnLength = binaryStatsData.isSetAvgColLen() ? OptionalDouble.of(binaryStatsData.getAvgColLen()) : OptionalDouble.empty(); - OptionalLong nullsCount = binaryStatsData.isSetNumNulls() ? OptionalLong.of(binaryStatsData.getNumNulls()) : OptionalLong.empty(); + OptionalLong nullsCount = binaryStatsData.isSetNumNulls() ? fromMetastoreNullsCount(binaryStatsData.getNumNulls()) : OptionalLong.empty(); return createBinaryColumnStatistics( maxColumnLength, getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), @@ -401,6 +401,18 @@ public static Optional fromMetastoreDate(Date date) return Optional.of(LocalDate.ofEpochDay(date.getDaysSinceEpoch())); } + /** + * Impala `COMPUTE STATS` will write -1 as the null count. + * @see IMPALA-7497 + */ + public static OptionalLong fromMetastoreNullsCount(long nullsCount) + { + if (nullsCount == -1L) { + return OptionalLong.empty(); + } + return OptionalLong.of(nullsCount); + } + public static Optional fromMetastoreDecimal(@Nullable Decimal decimal) { if (decimal == null) {