diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestMetadataReader.java b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestMetadataReader.java index aeca0f9ee551..03e1ca802d86 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestMetadataReader.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/reader/TestMetadataReader.java @@ -15,6 +15,8 @@ import com.google.common.io.BaseEncoding; import org.apache.parquet.column.statistics.BinaryStatistics; +import org.apache.parquet.column.statistics.DoubleStatistics; +import org.apache.parquet.column.statistics.FloatStatistics; import org.apache.parquet.column.statistics.IntStatistics; import org.apache.parquet.column.statistics.LongStatistics; import org.apache.parquet.format.Statistics; @@ -28,6 +30,8 @@ import static io.trino.testing.assertions.Assert.assertEquals; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; @@ -94,6 +98,48 @@ public void testReadStatsInt64(Optional fileCreatedBy) }); } + @Test(dataProvider = "allCreatedBy") + public void testReadStatsFloat(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMin(fromHex("1234ABCD")); + statistics.setMax(fromHex("12340000")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, FLOAT, "Test column"))) + .isInstanceOfSatisfying(FloatStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), -3.59039552E8f); + assertEquals(columnStatistics.getMax(), 1.868E-41f); + assertEquals(columnStatistics.genericGetMin(), -3.59039552E8f); + assertEquals(columnStatistics.genericGetMax(), 1.868E-41f); + }); + } + + @Test(dataProvider = "allCreatedBy") + public void testReadStatsDouble(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMin(fromHex("001234ABCD000000")); + statistics.setMax(fromHex("000000000000E043")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, DOUBLE, "Test column"))) + .isInstanceOfSatisfying(DoubleStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), 4.36428250013E-312); + assertEquals(columnStatistics.getMax(), 9.223372036854776E18); + assertEquals(columnStatistics.genericGetMin(), 4.36428250013E-312); + assertEquals(columnStatistics.genericGetMax(), 9.223372036854776E18); + }); + } + @Test(dataProvider = "allCreatedBy") public void testReadStatsInt64WithoutNullCount(Optional fileCreatedBy) { @@ -114,6 +160,126 @@ public void testReadStatsInt64WithoutNullCount(Optional fileCreatedBy) }); } + @Test(dataProvider = "allCreatedBy") + public void testReadStatsInt64WithoutMin(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMax(fromHex("3AA4000000000000")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, INT64, "Test column"))) + .isInstanceOfSatisfying(LongStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), 0); + assertEquals(columnStatistics.getMax(), 0); // file statistics indicate 42042 + assertEquals(columnStatistics.genericGetMin(), (Long) 0L); + assertEquals(columnStatistics.genericGetMax(), (Long) 0L); // file statistics indicate 42042 + }); + } + + @Test(dataProvider = "allCreatedBy") + public void testReadStatsInt64WithoutMax(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMin(fromHex("F6FFFFFFFFFFFFFF")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, INT64, "Test column"))) + .isInstanceOfSatisfying(LongStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), 0); // file statistics indicate -10 + assertEquals(columnStatistics.getMax(), 0); + assertEquals(columnStatistics.genericGetMin(), (Long) 0L); // file statistics indicate -10 + assertEquals(columnStatistics.genericGetMax(), (Long) 0L); + }); + } + + @Test(dataProvider = "allCreatedBy") + public void testReadStatsFloatWithoutMin(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMax(fromHex("12340000")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, FLOAT, "Test column"))) + .isInstanceOfSatisfying(FloatStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), 0f); + assertEquals(columnStatistics.getMax(), 0f); + assertEquals(columnStatistics.genericGetMin(), 0f); + assertEquals(columnStatistics.genericGetMax(), 0f); + }); + } + + @Test(dataProvider = "allCreatedBy") + public void testReadStatsFloatWithoutMax(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMin(fromHex("1234ABCD")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, FLOAT, "Test column"))) + .isInstanceOfSatisfying(FloatStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), 0f); + assertEquals(columnStatistics.getMax(), 0f); + assertEquals(columnStatistics.genericGetMin(), 0f); + assertEquals(columnStatistics.genericGetMax(), 0f); + }); + } + + @Test(dataProvider = "allCreatedBy") + public void testReadStatsDoubleWithoutMin(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMax(fromHex("3AA4000000000000")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, DOUBLE, "Test column"))) + .isInstanceOfSatisfying(DoubleStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), 0d); + assertEquals(columnStatistics.getMax(), 0d); + assertEquals(columnStatistics.genericGetMin(), 0d); + assertEquals(columnStatistics.genericGetMax(), 0d); + }); + } + + @Test(dataProvider = "allCreatedBy") + public void testReadStatsDoubleWithoutMax(Optional fileCreatedBy) + { + Statistics statistics = new Statistics(); + statistics.setNull_count(13); + statistics.setMin(fromHex("F6FFFFFFFFFFFFFF")); + assertThat(MetadataReader.readStats(fileCreatedBy, Optional.of(statistics), new PrimitiveType(OPTIONAL, DOUBLE, "Test column"))) + .isInstanceOfSatisfying(DoubleStatistics.class, columnStatistics -> { + assertFalse(columnStatistics.isEmpty()); + + assertTrue(columnStatistics.isNumNullsSet()); + assertEquals(columnStatistics.getNumNulls(), 13); + + assertEquals(columnStatistics.getMin(), 0d); + assertEquals(columnStatistics.getMax(), 0d); + assertEquals(columnStatistics.genericGetMin(), 0d); + assertEquals(columnStatistics.genericGetMax(), 0d); + }); + } + @Test(dataProvider = "allCreatedBy") public void testReadStatsBinary(Optional fileCreatedBy) {