Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions data/src/main/java/org/apache/iceberg/data/DeleteFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,8 @@
import java.util.Set;
import java.util.function.Predicate;
import org.apache.iceberg.Accessor;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileContent;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
Expand Down Expand Up @@ -64,7 +62,7 @@ public abstract class DeleteFilter<T> {
MetadataColumns.DELETE_FILE_POS);

private final long setFilterThreshold;
private final DataFile dataFile;
private final String filePath;
private final List<DeleteFile> posDeletes;
private final List<DeleteFile> eqDeletes;
private final Schema requiredSchema;
Expand All @@ -73,13 +71,13 @@ public abstract class DeleteFilter<T> {
private PositionDeleteIndex deleteRowPositions = null;
private Predicate<T> eqDeleteRows = null;

protected DeleteFilter(FileScanTask task, Schema tableSchema, Schema requestedSchema) {
protected DeleteFilter(String filePath, List<DeleteFile> deletes, Schema tableSchema, Schema requestedSchema) {
this.setFilterThreshold = DEFAULT_SET_FILTER_THRESHOLD;
this.dataFile = task.file();
this.filePath = filePath;

ImmutableList.Builder<DeleteFile> posDeleteBuilder = ImmutableList.builder();
ImmutableList.Builder<DeleteFile> eqDeleteBuilder = ImmutableList.builder();
for (DeleteFile delete : task.deletes()) {
for (DeleteFile delete : deletes) {
switch (delete.content()) {
case POSITION_DELETES:
posDeleteBuilder.add(delete);
Expand Down Expand Up @@ -214,7 +212,7 @@ public PositionDeleteIndex deletedRowPositions() {

if (deleteRowPositions == null) {
List<CloseableIterable<Record>> deletes = Lists.transform(posDeletes, this::openPosDeletes);
deleteRowPositions = Deletes.toPositionIndex(dataFile.path(), deletes);
deleteRowPositions = Deletes.toPositionIndex(filePath, deletes);
}
return deleteRowPositions;
}
Expand All @@ -228,10 +226,10 @@ private CloseableIterable<T> applyPosDeletes(CloseableIterable<T> records) {

// if there are fewer deletes than a reasonable number to keep in memory, use a set
if (posDeletes.stream().mapToLong(DeleteFile::recordCount).sum() < setFilterThreshold) {
return Deletes.filter(records, this::pos, Deletes.toPositionIndex(dataFile.path(), deletes));
return Deletes.filter(records, this::pos, Deletes.toPositionIndex(filePath, deletes));
}

return Deletes.streamingFilter(records, this::pos, Deletes.deletePositions(dataFile.path(), deletes));
return Deletes.streamingFilter(records, this::pos, Deletes.deletePositions(filePath, deletes));
}

private CloseableIterable<Record> openPosDeletes(DeleteFile file) {
Expand All @@ -255,7 +253,7 @@ private CloseableIterable<Record> openDeletes(DeleteFile deleteFile, Schema dele
.createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(deleteSchema, fileSchema));

if (deleteFile.content() == FileContent.POSITION_DELETES) {
builder.filter(Expressions.equal(MetadataColumns.DELETE_FILE_PATH.name(), dataFile.path()));
builder.filter(Expressions.equal(MetadataColumns.DELETE_FILE_PATH.name(), filePath));
}

return builder.build();
Expand All @@ -267,7 +265,7 @@ private CloseableIterable<Record> openDeletes(DeleteFile deleteFile, Schema dele
.createReaderFunc(fileSchema -> GenericOrcReader.buildReader(deleteSchema, fileSchema));

if (deleteFile.content() == FileContent.POSITION_DELETES) {
orcBuilder.filter(Expressions.equal(MetadataColumns.DELETE_FILE_PATH.name(), dataFile.path()));
orcBuilder.filter(Expressions.equal(MetadataColumns.DELETE_FILE_PATH.name(), filePath));
}

return orcBuilder.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class GenericDeleteFilter extends DeleteFilter<Record> {
private final InternalRecordWrapper asStructLike;

public GenericDeleteFilter(FileIO io, FileScanTask task, Schema tableSchema, Schema requestedSchema) {
super(task, tableSchema, requestedSchema);
super(task.file().path().toString(), task.deletes(), tableSchema, requestedSchema);
this.io = io;
this.asStructLike = new InternalRecordWrapper(requiredSchema().asStruct());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ private static class FlinkDeleteFilter extends DeleteFilter<RowData> {

FlinkDeleteFilter(FileScanTask task, Schema tableSchema, Schema requestedSchema,
InputFilesDecryptor inputFilesDecryptor) {
super(task, tableSchema, requestedSchema);
super(task.file().path().toString(), task.deletes(), tableSchema, requestedSchema);
this.requiredRowType = FlinkSchemaUtil.convert(requiredSchema());
this.asStructLike = new RowDataWrapper(requiredRowType, requiredSchema().asStruct());
this.inputFilesDecryptor = inputFilesDecryptor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ private class SparkDeleteFilter extends DeleteFilter<InternalRow> {
private final InternalRowWrapper asStructLike;

SparkDeleteFilter(FileScanTask task, Schema tableSchema, Schema requestedSchema) {
super(task, tableSchema, requestedSchema);
super(task.file().path().toString(), task.deletes(), tableSchema, requestedSchema);
this.asStructLike = new InternalRowWrapper(SparkSchemaUtil.convert(requiredSchema()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ protected class SparkDeleteFilter extends DeleteFilter<InternalRow> {
private final InternalRowWrapper asStructLike;

SparkDeleteFilter(FileScanTask task, Schema tableSchema, Schema requestedSchema) {
super(task, tableSchema, requestedSchema);
super(task.file().path().toString(), task.deletes(), tableSchema, requestedSchema);
this.asStructLike = new InternalRowWrapper(SparkSchemaUtil.convert(requiredSchema()));
}

Expand Down