diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java index 608cdf5d9f84..59a4ba55a755 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetTypeUtils.java @@ -13,19 +13,8 @@ */ package io.prestosql.parquet; -import io.prestosql.spi.PrestoException; -import io.prestosql.spi.predicate.Domain; -import io.prestosql.spi.predicate.TupleDomain; -import io.prestosql.spi.type.BigintType; -import io.prestosql.spi.type.BooleanType; -import io.prestosql.spi.type.DateType; import io.prestosql.spi.type.DecimalType; -import io.prestosql.spi.type.DoubleType; -import io.prestosql.spi.type.RealType; -import io.prestosql.spi.type.TimestampType; import io.prestosql.spi.type.Type; -import io.prestosql.spi.type.VarcharType; -import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.Encoding; import org.apache.parquet.io.ColumnIO; import org.apache.parquet.io.ColumnIOFactory; @@ -36,7 +25,6 @@ import org.apache.parquet.io.PrimitiveColumnIO; import org.apache.parquet.schema.DecimalMetadata; import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.OriginalType; import java.util.Arrays; import java.util.HashMap; @@ -46,8 +34,6 @@ import java.util.Optional; import static com.google.common.base.Preconditions.checkArgument; -import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED; -import static io.prestosql.spi.type.IntegerType.INTEGER; import static org.apache.parquet.schema.OriginalType.DECIMAL; import static org.apache.parquet.schema.Type.Repetition.REPEATED; @@ -159,46 +145,6 @@ private static int getPathIndex(List columns, List pa return index; } - public static Type getPrestoType(TupleDomain effectivePredicate, RichColumnDescriptor descriptor) - { - switch (descriptor.getType()) { - case BOOLEAN: - return BooleanType.BOOLEAN; - case BINARY: - return createDecimalType(descriptor).orElse(createVarcharType(effectivePredicate, descriptor)); - case FLOAT: - return RealType.REAL; - case DOUBLE: - return DoubleType.DOUBLE; - case INT32: - return getInt32Type(descriptor); - case INT64: - return createDecimalType(descriptor).orElse(BigintType.BIGINT); - case INT96: - return TimestampType.TIMESTAMP; - case FIXED_LEN_BYTE_ARRAY: - return createDecimalType(descriptor).orElseThrow(() -> new PrestoException(NOT_SUPPORTED, "Parquet type FIXED_LEN_BYTE_ARRAY supported as DECIMAL; got " + descriptor.getPrimitiveType().getOriginalType())); - default: - throw new PrestoException(NOT_SUPPORTED, "Unsupported parquet type: " + descriptor.getType()); - } - } - - private static Type createVarcharType(TupleDomain effectivePredicate, RichColumnDescriptor column) - { - // We look at the effectivePredicate domain here, because it matches the Hive column type - // more accurately than the type available in the RichColumnDescriptor. - // For example, a Hive column of type varchar(length) is encoded as a Parquet BINARY, but - // when that is converted to a Presto Type the length information wasn't retained. - Optional> predicateDomains = effectivePredicate.getDomains(); - if (predicateDomains.isPresent()) { - Domain domain = predicateDomains.get().get(column); - if (domain != null) { - return domain.getType(); - } - } - return VarcharType.VARCHAR; - } - public static int getFieldIndex(MessageType fileSchema, String name) { try { @@ -316,21 +262,4 @@ public static long getShortDecimalValue(byte[] bytes) return value; } - - private static Type getInt32Type(RichColumnDescriptor descriptor) - { - OriginalType originalType = descriptor.getPrimitiveType().getOriginalType(); - if (originalType == null) { - return INTEGER; - } - - switch (originalType) { - case DECIMAL: - return createDecimalType(descriptor.getPrimitiveType().getDecimalMetadata()); - case DATE: - return DateType.DATE; - default: - return INTEGER; - } - } } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java b/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java index ca7db78d077c..c9bcaf517e5d 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/predicate/TupleDomainParquetPredicate.java @@ -14,8 +14,8 @@ package io.prestosql.parquet.predicate; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.VerifyException; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import io.airlift.slice.Slice; import io.airlift.slice.Slices; import io.prestosql.parquet.DictionaryPage; @@ -44,7 +44,6 @@ import java.util.Optional; import java.util.function.Function; -import static io.prestosql.parquet.ParquetTypeUtils.getPrestoType; import static io.prestosql.parquet.predicate.PredicateUtils.isStatisticsOverflow; import static io.prestosql.spi.type.BigintType.BIGINT; import static io.prestosql.spi.type.BooleanType.BOOLEAN; @@ -78,42 +77,53 @@ public boolean matches(long numberOfRows, Map> s if (numberOfRows == 0) { return false; } - ImmutableMap.Builder domains = ImmutableMap.builder(); + if (effectivePredicate.isNone()) { + return false; + } + Map effectivePredicateDomains = effectivePredicate.getDomains() + .orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains")); for (RichColumnDescriptor column : columns) { - Statistics columnStatistics = statistics.get(column); + Domain effectivePredicateDomain = effectivePredicateDomains.get(column); + if (effectivePredicateDomain == null) { + continue; + } - Domain domain; - Type type = getPrestoType(effectivePredicate, column); + Statistics columnStatistics = statistics.get(column); if (columnStatistics == null || columnStatistics.isEmpty()) { // no stats for column - domain = Domain.all(type); } else { - domain = getDomain(type, numberOfRows, columnStatistics, id, column.toString(), failOnCorruptedParquetStatistics); + Domain domain = getDomain(effectivePredicateDomain.getType(), numberOfRows, columnStatistics, id, column.toString(), failOnCorruptedParquetStatistics); + if (effectivePredicateDomain.intersect(domain).isNone()) { + return false; + } } - domains.put(column, domain); } - TupleDomain stripeDomain = TupleDomain.withColumnDomains(domains.build()); - - return effectivePredicate.overlaps(stripeDomain); + return true; } @Override public boolean matches(Map dictionaries) { - ImmutableMap.Builder domains = ImmutableMap.builder(); + if (effectivePredicate.isNone()) { + return false; + } + Map effectivePredicateDomains = effectivePredicate.getDomains() + .orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains")); for (RichColumnDescriptor column : columns) { + Domain effectivePredicateDomain = effectivePredicateDomains.get(column); + if (effectivePredicateDomain == null) { + continue; + } DictionaryDescriptor dictionaryDescriptor = dictionaries.get(column); - Domain domain = getDomain(getPrestoType(effectivePredicate, column), dictionaryDescriptor); - if (domain != null) { - domains.put(column, domain); + Domain domain = getDomain(effectivePredicateDomain.getType(), dictionaryDescriptor); + if (effectivePredicateDomain.intersect(domain).isNone()) { + return false; } } - TupleDomain stripeDomain = TupleDomain.withColumnDomains(domains.build()); - - return effectivePredicate.overlaps(stripeDomain); + return true; } @VisibleForTesting @@ -148,8 +158,11 @@ public static Domain getDomain(Type type, long rowCount, Statistics statistic if (hasFalseValues) { return Domain.create(ValueSet.of(type, false), hasNullValue); } + // All nulls case is handled earlier + throw new VerifyException("Impossible boolean statistics"); } - else if ((type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER)) && (statistics instanceof LongStatistics || statistics instanceof IntStatistics)) { + + if ((type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER)) && (statistics instanceof LongStatistics || statistics instanceof IntStatistics)) { ParquetIntegerStatistics parquetIntegerStatistics; if (statistics instanceof LongStatistics) { LongStatistics longStatistics = (LongStatistics) statistics; @@ -172,7 +185,8 @@ else if ((type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) | } return createDomain(type, hasNullValue, parquetIntegerStatistics); } - else if (type.equals(REAL) && statistics instanceof FloatStatistics) { + + if (type.equals(REAL) && statistics instanceof FloatStatistics) { FloatStatistics floatStatistics = (FloatStatistics) statistics; if (floatStatistics.genericGetMin() > floatStatistics.genericGetMax()) { failWithCorruptionException(failOnCorruptedParquetStatistics, column, id, floatStatistics); @@ -185,7 +199,8 @@ else if (type.equals(REAL) && statistics instanceof FloatStatistics) { return createDomain(type, hasNullValue, parquetStatistics); } - else if (type.equals(DOUBLE) && statistics instanceof DoubleStatistics) { + + if (type.equals(DOUBLE) && statistics instanceof DoubleStatistics) { DoubleStatistics doubleStatistics = (DoubleStatistics) statistics; if (doubleStatistics.genericGetMin() > doubleStatistics.genericGetMax()) { failWithCorruptionException(failOnCorruptedParquetStatistics, column, id, doubleStatistics); @@ -194,7 +209,8 @@ else if (type.equals(DOUBLE) && statistics instanceof DoubleStatistics) { ParquetDoubleStatistics parquetDoubleStatistics = new ParquetDoubleStatistics(doubleStatistics.genericGetMin(), doubleStatistics.genericGetMax()); return createDomain(type, hasNullValue, parquetDoubleStatistics); } - else if (isVarcharType(type) && statistics instanceof BinaryStatistics) { + + if (isVarcharType(type) && statistics instanceof BinaryStatistics) { BinaryStatistics binaryStatistics = (BinaryStatistics) statistics; Slice minSlice = Slices.wrappedBuffer(binaryStatistics.getMin().getBytes()); Slice maxSlice = Slices.wrappedBuffer(binaryStatistics.getMax().getBytes()); @@ -205,7 +221,8 @@ else if (isVarcharType(type) && statistics instanceof BinaryStatistics) { ParquetStringStatistics parquetStringStatistics = new ParquetStringStatistics(minSlice, maxSlice); return createDomain(type, hasNullValue, parquetStringStatistics); } - else if (type.equals(DATE) && statistics instanceof IntStatistics) { + + if (type.equals(DATE) && statistics instanceof IntStatistics) { IntStatistics intStatistics = (IntStatistics) statistics; if (intStatistics.genericGetMin() > intStatistics.genericGetMax()) { failWithCorruptionException(failOnCorruptedParquetStatistics, column, id, intStatistics); @@ -214,6 +231,7 @@ else if (type.equals(DATE) && statistics instanceof IntStatistics) { ParquetIntegerStatistics parquetIntegerStatistics = new ParquetIntegerStatistics((long) intStatistics.getMin(), (long) intStatistics.getMax()); return createDomain(type, hasNullValue, parquetIntegerStatistics); } + return Domain.create(ValueSet.all(type), hasNullValue); } @@ -221,13 +239,13 @@ else if (type.equals(DATE) && statistics instanceof IntStatistics) { public static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescriptor) { if (dictionaryDescriptor == null) { - return null; + return Domain.all(type); } ColumnDescriptor columnDescriptor = dictionaryDescriptor.getColumnDescriptor(); Optional dictionaryPage = dictionaryDescriptor.getDictionaryPage(); if (!dictionaryPage.isPresent()) { - return null; + return Domain.all(type); } Dictionary dictionary; @@ -237,7 +255,8 @@ public static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescrip catch (Exception e) { // In case of exception, just continue reading the data, not using dictionary page at all // OK to ignore exception when reading dictionaries - return null; + // TODO take failOnCorruptedParquetStatistics parameter and handle appropriately + return Domain.all(type); } int dictionarySize = dictionaryPage.get().getDictionarySize(); @@ -249,7 +268,8 @@ public static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescrip domains.add(Domain.onlyNull(type)); return Domain.union(domains); } - else if ((type.equals(BIGINT) || type.equals(DATE)) && columnDescriptor.getType() == PrimitiveTypeName.INT32) { + + if ((type.equals(BIGINT) || type.equals(DATE)) && columnDescriptor.getType() == PrimitiveTypeName.INT32) { List domains = new ArrayList<>(); for (int i = 0; i < dictionarySize; i++) { domains.add(Domain.singleValue(type, (long) dictionary.decodeToInt(i))); @@ -257,7 +277,8 @@ else if ((type.equals(BIGINT) || type.equals(DATE)) && columnDescriptor.getType( domains.add(Domain.onlyNull(type)); return Domain.union(domains); } - else if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName.DOUBLE) { + + if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName.DOUBLE) { List domains = new ArrayList<>(); for (int i = 0; i < dictionarySize; i++) { domains.add(Domain.singleValue(type, dictionary.decodeToDouble(i))); @@ -265,7 +286,8 @@ else if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName. domains.add(Domain.onlyNull(type)); return Domain.union(domains); } - else if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName.FLOAT) { + + if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName.FLOAT) { List domains = new ArrayList<>(); for (int i = 0; i < dictionarySize; i++) { domains.add(Domain.singleValue(type, (double) dictionary.decodeToFloat(i))); @@ -273,7 +295,8 @@ else if (type.equals(DOUBLE) && columnDescriptor.getType() == PrimitiveTypeName. domains.add(Domain.onlyNull(type)); return Domain.union(domains); } - else if (isVarcharType(type) && columnDescriptor.getType() == PrimitiveTypeName.BINARY) { + + if (isVarcharType(type) && columnDescriptor.getType() == PrimitiveTypeName.BINARY) { List domains = new ArrayList<>(); for (int i = 0; i < dictionarySize; i++) { domains.add(Domain.singleValue(type, Slices.wrappedBuffer(dictionary.decodeToBinary(i).getBytes()))); @@ -281,7 +304,8 @@ else if (isVarcharType(type) && columnDescriptor.getType() == PrimitiveTypeName. domains.add(Domain.onlyNull(type)); return Domain.union(domains); } - return null; + + return Domain.all(type); } private static void failWithCorruptionException(boolean failOnCorruptedParquetStatistics, String column, ParquetDataSourceId id, Statistics statistics) diff --git a/presto-parquet/src/test/java/io/prestosql/parquet/TestParquetTypeUtils.java b/presto-parquet/src/test/java/io/prestosql/parquet/TestParquetTypeUtils.java deleted file mode 100644 index bf1b1f09e41a..000000000000 --- a/presto-parquet/src/test/java/io/prestosql/parquet/TestParquetTypeUtils.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.prestosql.parquet; - -import io.prestosql.spi.predicate.TupleDomain; -import org.apache.parquet.column.ColumnDescriptor; -import org.apache.parquet.schema.OriginalType; -import org.apache.parquet.schema.PrimitiveType; -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; -import org.testng.annotations.Test; - -import static io.prestosql.parquet.ParquetTypeUtils.getPrestoType; -import static io.prestosql.spi.type.DateType.DATE; -import static io.prestosql.spi.type.IntegerType.INTEGER; -import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; -import static org.testng.Assert.assertEquals; - -public class TestParquetTypeUtils -{ - @Test - public void testMapInt32ToPrestoInteger() - { - PrimitiveType intType = new PrimitiveType(OPTIONAL, PrimitiveTypeName.INT32, "int_col", OriginalType.INT_32); - ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"int_col"}, PrimitiveTypeName.INT32, 0, 1); - RichColumnDescriptor intColumn = new RichColumnDescriptor(columnDescriptor, intType); - assertEquals(getPrestoType(TupleDomain.all(), intColumn), INTEGER); - } - - @Test - public void testMapInt32WithoutOriginalTypeToPrestoInteger() - { - // int32 primitive should default to Presto integer if original type metadata isn't available - PrimitiveType intType = new PrimitiveType(OPTIONAL, PrimitiveTypeName.INT32, "int_col"); - ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"int_col"}, PrimitiveTypeName.INT32, 0, 1); - RichColumnDescriptor intColumn = new RichColumnDescriptor(columnDescriptor, intType); - assertEquals(getPrestoType(TupleDomain.all(), intColumn), INTEGER); - } - - @Test - public void testMapInt32ToPrestoDate() - { - // int32 primitive with original type of date should map to a Presto date - PrimitiveType dateType = new PrimitiveType(OPTIONAL, PrimitiveTypeName.INT32, "date_col", OriginalType.DATE); - ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"date_col"}, PrimitiveTypeName.INT32, 0, 1); - RichColumnDescriptor dateColumn = new RichColumnDescriptor(columnDescriptor, dateType); - assertEquals(getPrestoType(TupleDomain.all(), dateColumn), DATE); - } -} diff --git a/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java b/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java index 972f2f88d72c..d8a4ef946747 100644 --- a/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java +++ b/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java @@ -13,6 +13,7 @@ */ package io.prestosql.parquet; +import com.google.common.collect.ImmutableMap; import io.airlift.slice.Slice; import io.airlift.slice.Slices; import io.prestosql.parquet.predicate.DictionaryDescriptor; @@ -20,6 +21,7 @@ import io.prestosql.spi.predicate.Domain; import io.prestosql.spi.predicate.TupleDomain; import io.prestosql.spi.predicate.ValueSet; +import io.prestosql.spi.type.Type; import io.prestosql.spi.type.VarcharType; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.statistics.BinaryStatistics; @@ -31,6 +33,7 @@ import org.apache.parquet.column.statistics.Statistics; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.PrimitiveType; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.Map; @@ -61,9 +64,12 @@ import static java.util.Collections.singletonMap; import static org.apache.parquet.column.statistics.Statistics.getStatsBasedOnType; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32; +import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; import static org.assertj.core.api.Assertions.assertThatExceptionOfType; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; public class TestTupleDomainParquetPredicate @@ -108,13 +114,6 @@ public void testBigint() .withMessage("Corrupted statistics for column \"BigintColumn\" in Parquet file \"testFile\": [min: 100, max: 10, num_nulls: 0]"); } - private static LongStatistics longColumnStats(long minimum, long maximum) - { - LongStatistics statistics = new LongStatistics(); - statistics.setMinMax(minimum, maximum); - return statistics; - } - @Test public void testInteger() throws ParquetCorruptionException @@ -272,7 +271,7 @@ public void testDate() } @Test - public void testMatchesWithStatistics() + public void testVarcharMatchesWithStatistics() throws ParquetCorruptionException { String value = "Test"; @@ -280,15 +279,58 @@ public void testMatchesWithStatistics() RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); TupleDomain effectivePredicate = getEffectivePredicate(column, createVarcharType(255), utf8Slice(value)); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); - Statistics stats = getStatsBasedOnType(column.getType()); + Statistics stats = getStatsBasedOnType(column.getType()); stats.setNumNulls(1L); stats.setMinMaxFromBytes(value.getBytes(), value.getBytes()); - assertTrue(parquetPredicate.matches(2, singletonMap(column, stats), ID, true)); + assertTrue(parquetPredicate.matches(2, ImmutableMap.of(column, stats), ID, true)); + } + + @Test(dataProvider = "typeForParquetInt32") + public void testIntegerMatchesWithStatistics(Type typeForParquetInt32) + throws ParquetCorruptionException + { + RichColumnDescriptor column = new RichColumnDescriptor( + new ColumnDescriptor(new String[] {"path"}, INT32, 0, 0), + new PrimitiveType(OPTIONAL, INT32, "Test column")); + TupleDomain effectivePredicate = TupleDomain.withColumnDomains(ImmutableMap.of( + column, + Domain.create(ValueSet.of(typeForParquetInt32, 42L, 43L, 44L, 112L), false))); + TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); + + assertTrue(parquetPredicate.matches(2, ImmutableMap.of(column, intColumnStats(32, 42)), ID, true)); + assertFalse(parquetPredicate.matches(2, ImmutableMap.of(column, intColumnStats(30, 40)), ID, true)); + assertEquals(parquetPredicate.matches(2, ImmutableMap.of(column, intColumnStats(1024, 0x10000 + 42)), ID, true), (typeForParquetInt32 != INTEGER)); // stats invalid for smallint/tinyint + } + + @DataProvider + public Object[][] typeForParquetInt32() + { + return new Object[][] { + {INTEGER}, + {SMALLINT}, + {TINYINT}, + }; } @Test - public void testMatchesWithDescriptors() + public void testBigintMatchesWithStatistics() throws ParquetCorruptionException + { + RichColumnDescriptor column = new RichColumnDescriptor( + new ColumnDescriptor(new String[] {"path"}, INT64, 0, 0), + new PrimitiveType(OPTIONAL, INT64, "Test column")); + TupleDomain effectivePredicate = TupleDomain.withColumnDomains(ImmutableMap.of( + column, + Domain.create(ValueSet.of(BIGINT, 42L, 43L, 44L, 404L), false))); + TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); + + assertTrue(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(32, 42)), ID, true)); + assertFalse(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(30, 40)), ID, true)); + assertFalse(parquetPredicate.matches(2, ImmutableMap.of(column, longColumnStats(1024, 0x10000 + 42)), ID, true)); + } + + @Test + public void testVarcharMatchesWithDictionaryDescriptor() { ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0); RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); @@ -319,4 +361,11 @@ private static IntStatistics intColumnStats(int minimum, int maximum) statistics.setMinMax(minimum, maximum); return statistics; } + + private static LongStatistics longColumnStats(long minimum, long maximum) + { + LongStatistics statistics = new LongStatistics(); + statistics.setMinMax(minimum, maximum); + return statistics; + } }