diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/Predicate.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/Predicate.java index 5d9bcf337fde3..1295e163e9c37 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/Predicate.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/Predicate.java @@ -32,7 +32,7 @@ public boolean matches(long numberOfRows, Map> s } @Override - public boolean matches(Map dictionaries) + public boolean matches(DictionaryDescriptor dictionary) { return true; } @@ -51,9 +51,11 @@ boolean matches(long numberOfRows, Map> statisti throws ParquetCorruptionException; /** - * Should the Parquet Reader process a file section with the specified dictionary. + * Should the Parquet Reader process a file section with the specified dictionary based on that + * single dictionary. This is safe to check repeatedly to avoid loading more parquet dictionaries + * if the section can already be eliminated. * - * @param dictionaries dictionaries per column + * @param dictionary The single column dictionary */ - boolean matches(Map dictionaries); + boolean matches(DictionaryDescriptor dictionary); } diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/PredicateUtils.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/PredicateUtils.java index 06866992d0789..ff5a82f1cd6c2 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/PredicateUtils.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/PredicateUtils.java @@ -94,8 +94,7 @@ public static boolean predicateMatches(Predicate parquetPredicate, BlockMetaData return false; } - Map dictionaries = getDictionaries(block, dataSource, descriptorsByPath, parquetTupleDomain); - return parquetPredicate.matches(dictionaries); + return dictionaryPredicatesMatch(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain); } private static Map> getStatistics(BlockMetaData blockMetadata, Map, RichColumnDescriptor> descriptorsByPath) @@ -113,23 +112,22 @@ private static Map> getStatistics(BlockMetaData return statistics.build(); } - private static Map getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map, RichColumnDescriptor> descriptorsByPath, TupleDomain parquetTupleDomain) + private static boolean dictionaryPredicatesMatch(Predicate parquetPredicate, BlockMetaData blockMetadata, ParquetDataSource dataSource, Map, RichColumnDescriptor> descriptorsByPath, TupleDomain parquetTupleDomain) { - ImmutableMap.Builder dictionaries = ImmutableMap.builder(); for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) { RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor != null) { if (isOnlyDictionaryEncodingPages(columnMetaData) && isColumnPredicate(descriptor, parquetTupleDomain)) { - int totalSize = toIntExact(columnMetaData.getTotalSize()); - byte[] buffer = new byte[totalSize]; + byte[] buffer = new byte[toIntExact(columnMetaData.getTotalSize())]; dataSource.readFully(columnMetaData.getStartingPos(), buffer); - Optional dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec()); - dictionaries.put(descriptor, new DictionaryDescriptor(descriptor, dictionaryPage)); - break; + // Early abort, predicate already filters block so no more dictionaries need be read + if (!parquetPredicate.matches(new DictionaryDescriptor(descriptor, readDictionaryPage(buffer, columnMetaData.getCodec())))) { + return false; + } } } } - return dictionaries.build(); + return true; } private static Optional readDictionaryPage(byte[] data, CompressionCodecName codecName) @@ -157,7 +155,7 @@ private static Optional readDictionaryPage(byte[] data, Compress private static boolean isColumnPredicate(ColumnDescriptor columnDescriptor, TupleDomain parquetTupleDomain) { verify(parquetTupleDomain.getDomains().isPresent(), "parquetTupleDomain is empty"); - return parquetTupleDomain.getDomains().get().keySet().contains(columnDescriptor); + return parquetTupleDomain.getDomains().get().containsKey(columnDescriptor); } @VisibleForTesting diff --git a/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/TupleDomainParquetPredicate.java b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/TupleDomainParquetPredicate.java index 7327eae2ae250..3abcedb011563 100644 --- a/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/TupleDomainParquetPredicate.java +++ b/presto-parquet/src/main/java/com/facebook/presto/parquet/predicate/TupleDomainParquetPredicate.java @@ -106,8 +106,9 @@ public boolean matches(long numberOfRows, Map> s } @Override - public boolean matches(Map dictionaries) + public boolean matches(DictionaryDescriptor dictionary) { + requireNonNull(dictionary, "dictionary is null"); if (effectivePredicate.isNone()) { return false; } @@ -115,18 +116,14 @@ public boolean matches(Map dictionaries) Map effectivePredicateDomains = effectivePredicate.getDomains() .orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains")); - for (RichColumnDescriptor column : columns) { - Domain effectivePredicateDomain = effectivePredicateDomains.get(column); - if (effectivePredicateDomain == null) { - continue; - } - DictionaryDescriptor dictionaryDescriptor = dictionaries.get(column); - Domain domain = getDomain(effectivePredicateDomain.getType(), dictionaryDescriptor); - if (effectivePredicateDomain.intersect(domain).isNone()) { - return false; - } - } - return true; + Domain effectivePredicateDomain = effectivePredicateDomains.get(dictionary.getColumnDescriptor()); + + return effectivePredicateDomain == null || effectivePredicateMatches(effectivePredicateDomain, dictionary); + } + + private static boolean effectivePredicateMatches(Domain effectivePredicateDomain, DictionaryDescriptor dictionary) + { + return !effectivePredicateDomain.intersect(getDomain(effectivePredicateDomain.getType(), dictionary)).isNone(); } @VisibleForTesting diff --git a/presto-parquet/src/test/java/com/facebook/presto/parquet/TestTupleDomainParquetPredicate.java b/presto-parquet/src/test/java/com/facebook/presto/parquet/TestTupleDomainParquetPredicate.java index 4c657e9db9ef8..d0347acdb943e 100644 --- a/presto-parquet/src/test/java/com/facebook/presto/parquet/TestTupleDomainParquetPredicate.java +++ b/presto-parquet/src/test/java/com/facebook/presto/parquet/TestTupleDomainParquetPredicate.java @@ -337,7 +337,7 @@ public void testVarcharMatchesWithDictionaryDescriptor() TupleDomain effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE); TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column)); DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] {0, 0, 0, 0}), 1, PLAIN_DICTIONARY); - assertTrue(parquetPredicate.matches(singletonMap(column, new DictionaryDescriptor(column, Optional.of(page))))); + assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page)))); } private TupleDomain getEffectivePredicate(RichColumnDescriptor column, VarcharType type, Slice value)