-
Notifications
You must be signed in to change notification settings - Fork 5.5k
[Hotfix]nullsCount in columnStatistic should marked as not present in… #11549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -331,31 +331,31 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis | |
| LongColumnStatsData longStatsData = columnStatistics.getStatsData().getLongStats(); | ||
| OptionalLong min = longStatsData.isSetLowValue() ? OptionalLong.of(longStatsData.getLowValue()) : OptionalLong.empty(); | ||
| OptionalLong max = longStatsData.isSetHighValue() ? OptionalLong.of(longStatsData.getHighValue()) : OptionalLong.empty(); | ||
| OptionalLong nullsCount = longStatsData.isSetNumNulls() ? OptionalLong.of(longStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong nullsCount = longStatsData.isSetNumNulls() ? fromMetastoreNullsCount(longStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong distinctValuesCount = longStatsData.isSetNumDVs() ? OptionalLong.of(longStatsData.getNumDVs()) : OptionalLong.empty(); | ||
| return createIntegerColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); | ||
| } | ||
| if (columnStatistics.getStatsData().isSetDoubleStats()) { | ||
| DoubleColumnStatsData doubleStatsData = columnStatistics.getStatsData().getDoubleStats(); | ||
| OptionalDouble min = doubleStatsData.isSetLowValue() ? OptionalDouble.of(doubleStatsData.getLowValue()) : OptionalDouble.empty(); | ||
| OptionalDouble max = doubleStatsData.isSetHighValue() ? OptionalDouble.of(doubleStatsData.getHighValue()) : OptionalDouble.empty(); | ||
| OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? OptionalLong.of(doubleStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong nullsCount = doubleStatsData.isSetNumNulls() ? fromMetastoreNullsCount(doubleStatsData.getNumNulls()) : OptionalLong.empty(); | ||
|
||
| OptionalLong distinctValuesCount = doubleStatsData.isSetNumDVs() ? OptionalLong.of(doubleStatsData.getNumDVs()) : OptionalLong.empty(); | ||
| return createDoubleColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); | ||
| } | ||
| if (columnStatistics.getStatsData().isSetDecimalStats()) { | ||
| DecimalColumnStatsData decimalStatsData = columnStatistics.getStatsData().getDecimalStats(); | ||
| Optional<BigDecimal> min = decimalStatsData.isSetLowValue() ? fromMetastoreDecimal(decimalStatsData.getLowValue()) : Optional.empty(); | ||
| Optional<BigDecimal> max = decimalStatsData.isSetHighValue() ? fromMetastoreDecimal(decimalStatsData.getHighValue()) : Optional.empty(); | ||
| OptionalLong nullsCount = decimalStatsData.isSetNumNulls() ? OptionalLong.of(decimalStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong nullsCount = decimalStatsData.isSetNumNulls() ? fromMetastoreNullsCount(decimalStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong distinctValuesCount = decimalStatsData.isSetNumDVs() ? OptionalLong.of(decimalStatsData.getNumDVs()) : OptionalLong.empty(); | ||
| return createDecimalColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); | ||
| } | ||
| if (columnStatistics.getStatsData().isSetDateStats()) { | ||
| DateColumnStatsData dateStatsData = columnStatistics.getStatsData().getDateStats(); | ||
| Optional<LocalDate> min = dateStatsData.isSetLowValue() ? fromMetastoreDate(dateStatsData.getLowValue()) : Optional.empty(); | ||
| Optional<LocalDate> max = dateStatsData.isSetHighValue() ? fromMetastoreDate(dateStatsData.getHighValue()) : Optional.empty(); | ||
| OptionalLong nullsCount = dateStatsData.isSetNumNulls() ? OptionalLong.of(dateStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong nullsCount = dateStatsData.isSetNumNulls() ? fromMetastoreNullsCount(dateStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong distinctValuesCount = dateStatsData.isSetNumDVs() ? OptionalLong.of(dateStatsData.getNumDVs()) : OptionalLong.empty(); | ||
| return createDateColumnStatistics(min, max, nullsCount, fromMetastoreDistinctValuesCount(distinctValuesCount, nullsCount, rowCount)); | ||
| } | ||
|
|
@@ -364,13 +364,13 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis | |
| return createBooleanColumnStatistics( | ||
| booleanStatsData.isSetNumTrues() ? OptionalLong.of(booleanStatsData.getNumTrues()) : OptionalLong.empty(), | ||
| booleanStatsData.isSetNumFalses() ? OptionalLong.of(booleanStatsData.getNumFalses()) : OptionalLong.empty(), | ||
| booleanStatsData.isSetNumNulls() ? OptionalLong.of(booleanStatsData.getNumNulls()) : OptionalLong.empty()); | ||
| booleanStatsData.isSetNumNulls() ? fromMetastoreNullsCount(booleanStatsData.getNumNulls()) : OptionalLong.empty()); | ||
| } | ||
| if (columnStatistics.getStatsData().isSetStringStats()) { | ||
| StringColumnStatsData stringStatsData = columnStatistics.getStatsData().getStringStats(); | ||
| OptionalLong maxColumnLength = stringStatsData.isSetMaxColLen() ? OptionalLong.of(stringStatsData.getMaxColLen()) : OptionalLong.empty(); | ||
| OptionalDouble averageColumnLength = stringStatsData.isSetAvgColLen() ? OptionalDouble.of(stringStatsData.getAvgColLen()) : OptionalDouble.empty(); | ||
| OptionalLong nullsCount = stringStatsData.isSetNumNulls() ? OptionalLong.of(stringStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong nullsCount = stringStatsData.isSetNumNulls() ? fromMetastoreNullsCount(stringStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong distinctValuesCount = stringStatsData.isSetNumDVs() ? OptionalLong.of(stringStatsData.getNumDVs()) : OptionalLong.empty(); | ||
| return createStringColumnStatistics( | ||
| maxColumnLength, | ||
|
|
@@ -382,7 +382,7 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis | |
| BinaryColumnStatsData binaryStatsData = columnStatistics.getStatsData().getBinaryStats(); | ||
| OptionalLong maxColumnLength = binaryStatsData.isSetMaxColLen() ? OptionalLong.of(binaryStatsData.getMaxColLen()) : OptionalLong.empty(); | ||
| OptionalDouble averageColumnLength = binaryStatsData.isSetAvgColLen() ? OptionalDouble.of(binaryStatsData.getAvgColLen()) : OptionalDouble.empty(); | ||
| OptionalLong nullsCount = binaryStatsData.isSetNumNulls() ? OptionalLong.of(binaryStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| OptionalLong nullsCount = binaryStatsData.isSetNumNulls() ? fromMetastoreNullsCount(binaryStatsData.getNumNulls()) : OptionalLong.empty(); | ||
| return createBinaryColumnStatistics( | ||
| maxColumnLength, | ||
| getTotalSizeInBytes(averageColumnLength, rowCount, nullsCount), | ||
|
|
@@ -401,6 +401,18 @@ public static Optional<LocalDate> fromMetastoreDate(Date date) | |
| return Optional.of(LocalDate.ofEpochDay(date.getDaysSinceEpoch())); | ||
| } | ||
|
|
||
| /** | ||
| * Impala `COMPUTE STATS` will write -1 as the null count. | ||
| * @see <a href="https://issues.apache.org/jira/browse/IMPALA-7497">IMPALA-7497</a> | ||
| */ | ||
| public static OptionalLong fromMetastoreNullsCount(long nullsCount) | ||
| { | ||
| if (nullsCount == -1L) { | ||
findepi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return OptionalLong.empty(); | ||
| } | ||
| return OptionalLong.of(nullsCount); | ||
| } | ||
|
|
||
| public static Optional<BigDecimal> fromMetastoreDecimal(@Nullable Decimal decimal) | ||
| { | ||
| if (decimal == null) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are other places below that need an update (search for "nullsCount")
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@findepi , thanks for your correction.
I have add the same conversion for other nullsCounts in method fromMetastoreApiColumnStatistics
I thinks other methods like createDateStatistics which is used to write back the statistics information to Hive, so I should not add this conversion method to it, right?
What your opinion?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When writing, -1 should by no means be a written value, so no need to change there.