prestodb · zhenxiao · Oct 24, 2019 · Oct 23, 2019 · Oct 23, 2019
@@ -32,7 +32,7 @@ public boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> s
         }
 
         @Override
-        public boolean matches(Map<ColumnDescriptor, DictionaryDescriptor> dictionaries)
+        public boolean matches(DictionaryDescriptor dictionary)
         {
             return true;
         }
@@ -51,9 +51,11 @@ boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> statisti
             throws ParquetCorruptionException;
 
     /**
-     * Should the Parquet Reader process a file section with the specified dictionary.
+     * Should the Parquet Reader process a file section with the specified dictionary based on that
+     * single dictionary. This is safe to check repeatedly to avoid loading more parquet dictionaries
+     * if the section can already be eliminated.
      *
-     * @param dictionaries dictionaries per column
+     * @param dictionary The single column dictionary
      */
-    boolean matches(Map<ColumnDescriptor, DictionaryDescriptor> dictionaries);
+    boolean matches(DictionaryDescriptor dictionary);
 }
@@ -94,8 +94,7 @@ public static boolean predicateMatches(Predicate parquetPredicate, BlockMetaData
             return false;
         }
 
-        Map<ColumnDescriptor, DictionaryDescriptor> dictionaries = getDictionaries(block, dataSource, descriptorsByPath, parquetTupleDomain);
-        return parquetPredicate.matches(dictionaries);
+        return dictionaryPredicatesMatch(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain);
     }
 
     private static Map<ColumnDescriptor, Statistics<?>> getStatistics(BlockMetaData blockMetadata, Map<List<String>, RichColumnDescriptor> descriptorsByPath)
@@ -113,23 +112,22 @@ private static Map<ColumnDescriptor, Statistics<?>> getStatistics(BlockMetaData
         return statistics.build();
     }
 
-    private static Map<ColumnDescriptor, DictionaryDescriptor> getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain)
+    private static boolean dictionaryPredicatesMatch(Predicate parquetPredicate, BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain)
     {
-        ImmutableMap.Builder<ColumnDescriptor, DictionaryDescriptor> dictionaries = ImmutableMap.builder();
         for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) {
             RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray()));
             if (descriptor != null) {
                 if (isOnlyDictionaryEncodingPages(columnMetaData) && isColumnPredicate(descriptor, parquetTupleDomain)) {
-                    int totalSize = toIntExact(columnMetaData.getTotalSize());
-                    byte[] buffer = new byte[totalSize];
+                    byte[] buffer = new byte[toIntExact(columnMetaData.getTotalSize())];
                     dataSource.readFully(columnMetaData.getStartingPos(), buffer);
-                    Optional<DictionaryPage> dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec());
-                    dictionaries.put(descriptor, new DictionaryDescriptor(descriptor, dictionaryPage));
-                    break;
+                    //  Early abort, predicate already filters block so no more dictionaries need be read
+                    if (!parquetPredicate.matches(new DictionaryDescriptor(descriptor, readDictionaryPage(buffer, columnMetaData.getCodec())))) {
+                        return false;
+                    }
                 }
             }
         }
-        return dictionaries.build();
+        return true;
     }
 
     private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName)
@@ -157,7 +155,7 @@ private static Optional<DictionaryPage> readDictionaryPage(byte[] data, Compress
     private static boolean isColumnPredicate(ColumnDescriptor columnDescriptor, TupleDomain<ColumnDescriptor> parquetTupleDomain)
     {
         verify(parquetTupleDomain.getDomains().isPresent(), "parquetTupleDomain is empty");
-        return parquetTupleDomain.getDomains().get().keySet().contains(columnDescriptor);
+        return parquetTupleDomain.getDomains().get().containsKey(columnDescriptor);
     }
 
     @VisibleForTesting

@@ -106,27 +106,24 @@ public boolean matches(long numberOfRows, Map<ColumnDescriptor, Statistics<?>> s
     }
 
     @Override
-    public boolean matches(Map<ColumnDescriptor, DictionaryDescriptor> dictionaries)
+    public boolean matches(DictionaryDescriptor dictionary)
     {
+        requireNonNull(dictionary, "dictionary is null");
         if (effectivePredicate.isNone()) {
             return false;
         }
 
         Map<ColumnDescriptor, Domain> effectivePredicateDomains = effectivePredicate.getDomains()
                 .orElseThrow(() -> new IllegalStateException("Effective predicate other than none should have domains"));
 
-        for (RichColumnDescriptor column : columns) {
-            Domain effectivePredicateDomain = effectivePredicateDomains.get(column);
-            if (effectivePredicateDomain == null) {
-                continue;
-            }
-            DictionaryDescriptor dictionaryDescriptor = dictionaries.get(column);
-            Domain domain = getDomain(effectivePredicateDomain.getType(), dictionaryDescriptor);
-            if (effectivePredicateDomain.intersect(domain).isNone()) {
-                return false;
-            }
-        }
-        return true;
+        Domain effectivePredicateDomain = effectivePredicateDomains.get(dictionary.getColumnDescriptor());
+
+        return effectivePredicateDomain == null || effectivePredicateMatches(effectivePredicateDomain, dictionary);
+    }
+
+    private static boolean effectivePredicateMatches(Domain effectivePredicateDomain, DictionaryDescriptor dictionary)
+    {
+        return !effectivePredicateDomain.intersect(getDomain(effectivePredicateDomain.getType(), dictionary)).isNone();
     }
 
     @VisibleForTesting

@@ -337,7 +337,7 @@ public void testVarcharMatchesWithDictionaryDescriptor()
         TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE);
         TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
         DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] {0, 0, 0, 0}), 1, PLAIN_DICTIONARY);
-        assertTrue(parquetPredicate.matches(singletonMap(column, new DictionaryDescriptor(column, Optional.of(page)))));
+        assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page))));
     }
 
     private TupleDomain<ColumnDescriptor> getEffectivePredicate(RichColumnDescriptor column, VarcharType type, Slice value)