diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java index 9633b61006..b28fddee42 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/ColumnIndexBuilder.java @@ -557,6 +557,10 @@ private ColumnIndexBase build(PrimitiveType type) { return null; } ColumnIndexBase columnIndex = createColumnIndex(type); + if (columnIndex == null) { + // Might happen if the specialized builder discovers invalid min/max values + return null; + } columnIndex.nullPages = nullPages.toBooleanArray(); // Null counts is optional so keep it null if the builder has no values if (!nullCounts.isEmpty()) { diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java index 24be02c2ea..074d02573f 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/DoubleColumnIndexBuilder.java @@ -84,6 +84,7 @@ int compareValueToMax(int arrayIndex) { private final DoubleList minValues = new DoubleArrayList(); private final DoubleList maxValues = new DoubleArrayList(); + private boolean invalid; private static double convert(ByteBuffer buffer) { return buffer.order(LITTLE_ENDIAN).getDouble(0); @@ -101,12 +102,30 @@ void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) { @Override void addMinMax(Object min, Object max) { - minValues.add((double) min); - maxValues.add((double) max); + double dMin = (double) min; + double dMax = (double) max; + if (Double.isNaN(dMin) || Double.isNaN(dMax)) { + // Invalidate this column index in case of NaN as the sorting order of values is undefined for this case + invalid = true; + } + + // Sorting order is undefined for -0.0 so let min = -0.0 and max = +0.0 to ensure that no 0.0 values are skipped + if (Double.compare(dMin, +0.0) == 0) { + dMin = -0.0; + } + if (Double.compare(dMax, -0.0) == 0) { + dMax = +0.0; + } + + minValues.add(dMin); + maxValues.add(dMax); } @Override ColumnIndexBase createColumnIndex(PrimitiveType type) { + if (invalid) { + return null; + } DoubleColumnIndex columnIndex = new DoubleColumnIndex(type); columnIndex.minValues = minValues.toDoubleArray(); columnIndex.maxValues = maxValues.toDoubleArray(); diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java index 4452746c4f..cbcdf949d8 100644 --- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java +++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/FloatColumnIndexBuilder.java @@ -84,6 +84,7 @@ int compareValueToMax(int arrayIndex) { private final FloatList minValues = new FloatArrayList(); private final FloatList maxValues = new FloatArrayList(); + private boolean invalid; private static float convert(ByteBuffer buffer) { return buffer.order(LITTLE_ENDIAN).getFloat(0); @@ -101,12 +102,30 @@ void addMinMaxFromBytes(ByteBuffer min, ByteBuffer max) { @Override void addMinMax(Object min, Object max) { - minValues.add((float) min); - maxValues.add((float) max); + float fMin = (float) min; + float fMax = (float) max; + if (Float.isNaN(fMin) || Float.isNaN(fMax)) { + // Invalidate this column index in case of NaN as the sorting order of values is undefined for this case + invalid = true; + } + + // Sorting order is undefined for -0.0 so let min = -0.0 and max = +0.0 to ensure that no 0.0 values are skipped + if (Float.compare(fMin, +0.0f) == 0) { + fMin = -0.0f; + } + if (Float.compare(fMax, -0.0f) == 0) { + fMax = +0.0f; + } + + minValues.add(fMin); + maxValues.add(fMax); } @Override ColumnIndexBase createColumnIndex(PrimitiveType type) { + if (invalid) { + return null; + } FloatColumnIndex columnIndex = new FloatColumnIndex(type); columnIndex.minValues = minValues.toFloatArray(); columnIndex.maxValues = maxValues.toFloatArray(); diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java index e4655b2657..5a3947c980 100644 --- a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java +++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestColumnIndexBuilder.java @@ -48,6 +48,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.math.BigDecimal; @@ -810,6 +811,25 @@ public void testBuildDouble() { assertCorrectFiltering(columnIndex, invert(userDefined(col, DoubleIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8); } + @Test + public void testBuildDoubleZeroNaN() { + PrimitiveType type = Types.required(DOUBLE).named("test_double"); + ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE); + StatsBuilder sb = new StatsBuilder(); + builder.add(sb.stats(type, -1.0, -0.0)); + builder.add(sb.stats(type, 0.0, 1.0)); + builder.add(sb.stats(type, 1.0, 100.0)); + ColumnIndex columnIndex = builder.build(); + assertCorrectValues(columnIndex.getMinValues(), -1.0, -0.0, 1.0); + assertCorrectValues(columnIndex.getMaxValues(), 0.0, 1.0, 100.0); + + builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE); + builder.add(sb.stats(type, -1.0, -0.0)); + builder.add(sb.stats(type, 0.0, Double.NaN)); + builder.add(sb.stats(type, 1.0, 100.0)); + assertNull(builder.build()); + } + @Test public void testStaticBuildDouble() { ColumnIndex columnIndex = ColumnIndexBuilder.build( @@ -921,6 +941,25 @@ public void testBuildFloat() { assertCorrectFiltering(columnIndex, invert(userDefined(col, FloatIsInteger.class)), 0, 1, 2, 3, 4, 5, 6, 7, 8); } + @Test + public void testBuildFloatZeroNaN() { + PrimitiveType type = Types.required(FLOAT).named("test_float"); + ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE); + StatsBuilder sb = new StatsBuilder(); + builder.add(sb.stats(type, -1.0f, -0.0f)); + builder.add(sb.stats(type, 0.0f, 1.0f)); + builder.add(sb.stats(type, 1.0f, 100.0f)); + ColumnIndex columnIndex = builder.build(); + assertCorrectValues(columnIndex.getMinValues(), -1.0f, -0.0f, 1.0f); + assertCorrectValues(columnIndex.getMaxValues(), 0.0f, 1.0f, 100.0f); + + builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE); + builder.add(sb.stats(type, -1.0f, -0.0f)); + builder.add(sb.stats(type, 0.0f, Float.NaN)); + builder.add(sb.stats(type, 1.0f, 100.0f)); + assertNull(builder.build()); + } + @Test public void testStaticBuildFloat() { ColumnIndex columnIndex = ColumnIndexBuilder.build( @@ -1391,7 +1430,7 @@ private static void assertCorrectValues(List values, Double... expec assertFalse("The byte buffer should be empty for null pages", value.hasRemaining()); } else { assertEquals("The byte buffer should be 8 bytes long for double", 8, value.remaining()); - assertEquals("Invalid value for page " + i, expectedValue.doubleValue(), value.getDouble(0), 0.0); + assertTrue("Invalid value for page " + i, Double.compare(expectedValue.doubleValue(), value.getDouble(0)) == 0); } } } @@ -1405,7 +1444,7 @@ private static void assertCorrectValues(List values, Float... expect assertFalse("The byte buffer should be empty for null pages", value.hasRemaining()); } else { assertEquals("The byte buffer should be 4 bytes long for double", 4, value.remaining()); - assertEquals("Invalid value for page " + i, expectedValue.floatValue(), value.getFloat(0), 0.0f); + assertTrue("Invalid value for page " + i, Float.compare(expectedValue.floatValue(), value.getFloat(0)) == 0); } } }