-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Data: delete compaction optimization by bloom filter #5100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,8 @@ | |
|
|
||
| package org.apache.iceberg.data; | ||
|
|
||
| import java.io.IOException; | ||
| import java.io.UncheckedIOException; | ||
| import java.util.Collection; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
|
|
@@ -36,11 +38,15 @@ | |
| import org.apache.iceberg.data.parquet.GenericParquetReaders; | ||
| import org.apache.iceberg.deletes.Deletes; | ||
| import org.apache.iceberg.deletes.PositionDeleteIndex; | ||
| import org.apache.iceberg.expressions.Expression; | ||
| import org.apache.iceberg.expressions.Expressions; | ||
| import org.apache.iceberg.expressions.Literal; | ||
| import org.apache.iceberg.io.CloseableIterable; | ||
| import org.apache.iceberg.io.InputFile; | ||
| import org.apache.iceberg.orc.ORC; | ||
| import org.apache.iceberg.parquet.Parquet; | ||
| import org.apache.iceberg.parquet.ParquetBloomRowGroupFilter; | ||
| import org.apache.iceberg.parquet.ParquetUtil; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Iterables; | ||
|
|
@@ -49,10 +55,15 @@ | |
| import org.apache.iceberg.relocated.com.google.common.collect.Multimap; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Multimaps; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Sets; | ||
| import org.apache.iceberg.types.Type; | ||
| import org.apache.iceberg.types.TypeUtil; | ||
| import org.apache.iceberg.types.Types; | ||
| import org.apache.iceberg.util.StructLikeSet; | ||
| import org.apache.iceberg.util.StructProjection; | ||
| import org.apache.parquet.hadoop.BloomFilterReader; | ||
| import org.apache.parquet.hadoop.ParquetFileReader; | ||
| import org.apache.parquet.hadoop.metadata.BlockMetaData; | ||
| import org.apache.parquet.schema.MessageType; | ||
|
|
||
| public abstract class DeleteFilter<T> { | ||
| private static final long DEFAULT_SET_FILTER_THRESHOLD = 100_000L; | ||
|
|
@@ -133,10 +144,19 @@ public CloseableIterable<T> filter(CloseableIterable<T> records) { | |
|
|
||
| private List<Predicate<T>> applyEqDeletes() { | ||
| List<Predicate<T>> isInDeleteSets = Lists.newArrayList(); | ||
| Map<BlockMetaData, BloomFilterReader> parquetBloomFilterReader = Maps.newHashMap(); | ||
| ParquetFileReader parquetReader = null; | ||
| Predicate<Record> isInBloomFilter = null; | ||
| if (eqDeletes.isEmpty()) { | ||
| return isInDeleteSets; | ||
| } | ||
|
|
||
| // load bloomfilter readers from data file | ||
| if (filePath.endsWith(".parquet")) { | ||
| parquetReader = ParquetUtil.openFile(getInputFile(filePath)); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe it's a big change , I want to keep this reader open during the iteration of delete files, and considering orc format, the delete iteration need to be encapsulated in a new function, any advise for this change? |
||
| parquetBloomFilterReader.putAll(ParquetUtil.getParquetBloomFilters(parquetReader)); | ||
| } | ||
|
|
||
| Multimap<Set<Integer>, DeleteFile> filesByDeleteIds = Multimaps.newMultimap(Maps.newHashMap(), Lists::newArrayList); | ||
| for (DeleteFile delete : eqDeletes) { | ||
| filesByDeleteIds.put(Sets.newHashSet(delete.equalityFieldIds()), delete); | ||
|
|
@@ -148,6 +168,12 @@ private List<Predicate<T>> applyEqDeletes() { | |
|
|
||
| Schema deleteSchema = TypeUtil.select(requiredSchema, ids); | ||
|
|
||
| if (filePath.endsWith(".parquet") && parquetReader != null) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can change the ctor parameter from
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, you are right, but dataFile is not passed into DeleteFilter as parameter, it was changed to filePath in #4381
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, any concern if we change it to
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the change for constructor parameters is only for Trino supporting mor,Trino wrapped a dummy fileScanTask for data file currently, the author wants to remove fileScanTask implemented in Trino, and use the filePath parameter. If we change it to dataFile, compatibility is a problem. |
||
| MessageType fileSchema = parquetReader.getFileMetaData().getSchema(); | ||
| isInBloomFilter = | ||
| record -> findInParquetBloomFilter(record, deleteSchema, fileSchema, parquetBloomFilterReader); | ||
| } | ||
|
|
||
| // a projection to select and reorder fields of the file schema to match the delete rows | ||
| StructProjection projectRow = StructProjection.create(requiredSchema, deleteSchema); | ||
|
|
||
|
|
@@ -158,6 +184,11 @@ private List<Predicate<T>> applyEqDeletes() { | |
| CloseableIterable<Record> records = CloseableIterable.transform( | ||
| CloseableIterable.concat(deleteRecords), Record::copy); | ||
|
|
||
| // apply bloomfilter on delete records | ||
| if (isInBloomFilter != null) { | ||
| records = CloseableIterable.filter(records, isInBloomFilter); | ||
| } | ||
|
|
||
| StructLikeSet deleteSet = Deletes.toEqualitySet( | ||
| CloseableIterable.transform( | ||
| records, record -> new InternalRecordWrapper(deleteSchema.asStruct()).wrap(record)), | ||
|
|
@@ -166,10 +197,67 @@ private List<Predicate<T>> applyEqDeletes() { | |
| Predicate<T> isInDeleteSet = record -> deleteSet.contains(projectRow.wrap(asStructLike(record))); | ||
| isInDeleteSets.add(isInDeleteSet); | ||
| } | ||
|
|
||
| try { | ||
| if (parquetReader != null) { | ||
| parquetReader.close(); | ||
| } | ||
| } catch (IOException e) { | ||
| throw new UncheckedIOException("failed to close parquet file reader!", e); | ||
| } | ||
| return isInDeleteSets; | ||
| } | ||
|
|
||
| private boolean findInParquetBloomFilter( | ||
| Record record, | ||
| Schema deleteSchema, | ||
| MessageType fileSchema, | ||
| Map<BlockMetaData, BloomFilterReader> parquetBloomFilterReader) { | ||
| if (record.size() == 0 || parquetBloomFilterReader.isEmpty()) { | ||
| return true; | ||
| } | ||
| // build filter by record values | ||
| Expression filter = buildFilter(record, deleteSchema); | ||
| ParquetBloomRowGroupFilter bloomFilter = new ParquetBloomRowGroupFilter(deleteSchema, filter, true); | ||
| for (Map.Entry<BlockMetaData, BloomFilterReader> entry : parquetBloomFilterReader.entrySet()) { | ||
| boolean shouldRead = bloomFilter.shouldRead(fileSchema, entry.getKey(), entry.getValue()); | ||
| if (shouldRead) { | ||
| return true; | ||
| } | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
| private Expression buildFilter(Record record, Schema schema) { | ||
| Expression filter = Expressions.alwaysTrue(); | ||
| for (Types.NestedField field : schema.columns()) { | ||
| Object value = getRecordValue(record, field); | ||
| if (value == null) { | ||
| continue; | ||
| } | ||
| filter = Expressions.and(filter, Expressions.equal(field.name(), value)); | ||
| } | ||
| return filter; | ||
| } | ||
|
|
||
| private Object getRecordValue(Record record, Types.NestedField field) { | ||
| Type type = field.type(); | ||
| switch (type.toString()) { | ||
| case "date": | ||
| return Literal.of(record.getField(field.name()).toString()).to(Types.DateType.get()).value(); | ||
| case "time": | ||
| return Literal.of(record.getField(field.name()).toString()).to(Types.TimeType.get()).value(); | ||
| case "timestamp": | ||
| if (((Types.TimestampType) type).shouldAdjustToUTC()) { | ||
| return Literal.of(record.getField(field.name()).toString()).to(Types.TimestampType.withZone()).value(); | ||
| } else { | ||
| return Literal.of(record.getField(field.name()).toString()).to(Types.TimestampType.withoutZone()).value(); | ||
| } | ||
| default: | ||
| return record.getField(field.name()); | ||
| } | ||
| } | ||
|
|
||
| public CloseableIterable<T> findEqualityDeleteRows(CloseableIterable<T> records) { | ||
| // Predicate to test whether a row has been deleted by equality deletions. | ||
| Predicate<T> deletedRows = applyEqDeletes().stream() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want to check whether the bloom filter is turned on to avoid reading the footer if it is not?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You mean we check the bloom filter by table properties, right? but the bloom filter properties may be updated, the bloom filter in current file is unmatched with table properties.