Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion api/src/main/java/org/apache/iceberg/OverwriteFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public interface OverwriteFiles extends SnapshotUpdate<OverwriteFiles> {
OverwriteFiles caseSensitive(boolean caseSensitive);

/**
* Enables validation that files added concurrently do not conflict with this commit's operation.
* Enables validation that data files added concurrently do not conflict with this commit's operation.
* <p>
* This method should be called when the table is queried to determine which files to delete/append.
* If a concurrent operation commits a new file after the data was read and that file might
Expand Down Expand Up @@ -145,4 +145,23 @@ public interface OverwriteFiles extends SnapshotUpdate<OverwriteFiles> {
*/
@Deprecated
OverwriteFiles validateNoConflictingAppends(Long readSnapshotId, Expression conflictDetectionFilter);

/**
* Enables validation that delete files added concurrently do not conflict with this commit's operation.
* <p>
* Validating concurrently added delete files is required during DELETE, UPDATE and MERGE operations.
* If a concurrent operation adds a new delete file that applies to one of the data files being
* overwritten, the overwrite operation must be aborted as it may undelete rows that were removed
* concurrently.
* <p>
* Calling this method with a correct conflict detection filter is required to maintain
* serializable isolation for overwrite operations. Otherwise, the isolation level
* will be snapshot isolation.
* <p>
* Validation applies to operations that happened after the snapshot passed to {@link #validateFromSnapshot(long)}.
*
* @param conflictDetectionFilter an expression on rows in the table
* @return this for method chaining
*/
OverwriteFiles validateNoConflictingDeleteFiles(Expression conflictDetectionFilter);
}
17 changes: 16 additions & 1 deletion api/src/main/java/org/apache/iceberg/RowDelta.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public interface RowDelta extends SnapshotUpdate<RowDelta> {
RowDelta validateDeletedFiles();

/**
* Enables validation that files added concurrently do not conflict with this commit's operation.
* Enables validation that data files added concurrently do not conflict with this commit's operation.
* <p>
* This method should be called when the table is queried to determine which files to delete/append.
* If a concurrent operation commits a new file after the data was read and that file might
Expand All @@ -111,4 +111,19 @@ public interface RowDelta extends SnapshotUpdate<RowDelta> {
* @return this for method chaining
*/
RowDelta validateNoConflictingAppends(Expression conflictDetectionFilter);

/**
* Enables validation that delete files added concurrently do not conflict with this commit's operation.
* <p>
* This method must be called when the table is queried to produce a row delta for UPDATE and
* MERGE operations independently of the isolation level. Calling this method isn't required
* for DELETE operations as it is OK when a particular record we are trying to delete
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: use of "we" in javadoc is unnecessary. It is simpler to say "it is OK to delete a record that is also deleted concurrently".

* was deleted concurrently.
* <p>
* Validation applies to operations that happened after the snapshot passed to {@link #validateFromSnapshot(long)}.
*
* @param conflictDetectionFilter an expression on rows in the table
* @return this for method chaining
*/
RowDelta validateNoConflictingDeleteFiles(Expression conflictDetectionFilter);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: should we make it a bit consistent with above? (ie, omit 'files' from the name)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did that in the first place but then I started to worry it may be confusing. For example, we refer here to concurrently added delete files vs concurrently happened delete operations that removed data files.

I do prefer consistency too but I am not sure whether it is confusing. What do you think, @szehon-ho?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes fine with me then, thanks for clarifying

}
33 changes: 29 additions & 4 deletions core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,22 @@

package org.apache.iceberg;

import java.util.Set;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Evaluator;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.Projections;
import org.apache.iceberg.expressions.StrictMetricsEvaluator;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;

public class BaseOverwriteFiles extends MergingSnapshotProducer<OverwriteFiles> implements OverwriteFiles {
private final Set<DataFile> deletedDataFiles = Sets.newHashSet();
private boolean validateAddedFilesMatchOverwriteFilter = false;
private Long startingSnapshotId = null;
private Expression conflictDetectionFilter = null;
private Expression appendConflictDetectionFilter = null;
private Expression deleteConflictDetectionFilter = null;
private boolean caseSensitive = true;

protected BaseOverwriteFiles(String tableName, TableOperations ops) {
Expand Down Expand Up @@ -60,6 +65,7 @@ public OverwriteFiles addFile(DataFile file) {

@Override
public OverwriteFiles deleteFile(DataFile file) {
deletedDataFiles.add(file);
delete(file);
return this;
}
Expand Down Expand Up @@ -95,11 +101,18 @@ public OverwriteFiles caseSensitive(boolean isCaseSensitive) {
@Override
public OverwriteFiles validateNoConflictingAppends(Expression newConflictDetectionFilter) {
Preconditions.checkArgument(newConflictDetectionFilter != null, "Conflict detection filter cannot be null");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Should this be Append conflict detection filter cannot be null now that we have both appendConflictDetectionFilter and delteConflictDectionFilter?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll update if it fits on the same line.

this.conflictDetectionFilter = newConflictDetectionFilter;
this.appendConflictDetectionFilter = newConflictDetectionFilter;
failMissingDeletePaths();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Does this call to failMissingDeletePaths() still belong here or should it be moved to the validateNoConflictingDeleteFiles call?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll double check.

return this;
}

@Override
public OverwriteFiles validateNoConflictingDeleteFiles(Expression newConflictDetectionFilter) {
Preconditions.checkArgument(newConflictDetectionFilter != null, "Conflict detection filter cannot be null");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Same comment about saying "Delete conflict detection filter cannot be null` instead of leaving it unqualified and ambiguous.

this.deleteConflictDetectionFilter = newConflictDetectionFilter;
return this;
}

@Override
protected void validate(TableMetadata base) {
if (validateAddedFilesMatchOverwriteFilter) {
Expand Down Expand Up @@ -127,8 +140,20 @@ protected void validate(TableMetadata base) {
}
}

if (conflictDetectionFilter != null && base.currentSnapshot() != null) {
validateAddedDataFiles(base, startingSnapshotId, conflictDetectionFilter, caseSensitive);
if (appendConflictDetectionFilter != null && base.currentSnapshot() != null) {
validateAddedDataFiles(base, startingSnapshotId, appendConflictDetectionFilter, caseSensitive);
}

boolean validateNewDeletes = deleteConflictDetectionFilter != null && base.currentSnapshot() != null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the behavior here should be slightly different. There are two concerns: 1) whether to check delete files for snapshot isolation and 2) what conflict detection filter to use. Basing validateNewDeletes on whether the conflict detection filter was set doesn't seem correct to me.

I don't think there is a case where we don't want to validate delete files if we have called validateFromSnapshot to set the base snapshot. I think that we should add this as a boolean field that is set when validateFromSnapshot is called.

Then, if we are validating delete files, we should have two separate checks. First, if there are any files in deletedDataFiles, then we perform the validation below. If the conflict detection filter wasn't set, then we should use Expressions.alwaysTrue to find candidate delete files. Second, if an overwrite filter was set, then we should run validateNoNewDeletes with either the delete filter or the delete conflict detection filter. The conflict detection filter should be an optimization, not a way to turn off delete validations.

I think that makes the API more understandable and consistent.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's what I changed this to locally while thinking through it:

    // validateDeletes is set to true in validateFromSnapshot. Maybe we should default it if that method isn't called?
    if (validateDeletes) {
      if (deletedDataFiles.size() > 0) {
        validateNoNewDeletesForDataFiles(
            base, startingSnapshotId, deleteConflictDetectionFilter,
            deletedDataFiles, caseSensitive);
      }

      if (rowFilter() != Expressions.alwaysFalse()) {
        if (deleteConflictDetectionFilter != null) {
          validateNoNewDeletes(base, startingSnapshotId, deleteConflictDetectionFilter, caseSensitive);
        } else {
          validateNoNewDeletes(base, startingSnapshotId, rowFilter(), caseSensitive);
        }
      }
    }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basing validateNewDeletes on whether the conflict detection filter was set doesn't seem correct to me.

If I got you correctly, you are proposing that validateFromSnapshot will now indicate whether we should validate delete files. I think that is different compared to how RowDelta and OverwriteFiles work right now. I'd actually say calling validateFromSnapshot is an optimization that tells us from which snapshot to start looking. We never validate new appends if the append conflict detection filter is null. Moreover, it is not always possible to set the starting snapshot. If we start on an empty table, we must validate all snapshots. Here is our copy-on-write commit logic.

Long scanSnapshotId = scan.snapshotId();
if (scanSnapshotId != null) {
  overwriteFiles.validateFromSnapshot(scanSnapshotId);
}

Expression conflictDetectionFilter = conflictDetectionFilter();
overwriteFiles.validateNoConflictingAppends(conflictDetectionFilter);

Also, in your snippet, why call validateNoNewDeletesForDataFiles if we already know the overwrite filter is set? I think validateNoNewDeletesForDataFiles is simply a more efficient version of validateNoNewDeletes that can open delete files that match the filter to check their content for conflicts. The problem is that we can use validateNoNewDeletesForDataFiles only if we overwrite specific files.

boolean overwriteByFilter = rowFilter() != Expressions.alwaysFalse();

if (validateNewDeletes && overwriteByFilter) {
validateNoNewDeletes(base, startingSnapshotId, deleteConflictDetectionFilter, caseSensitive);
} else if (validateNewDeletes && deletedDataFiles.size() > 0) {
// it is sufficient to ensure we don't have new delete files only for overwritten data files
validateNoNewDeletesForDataFiles(
base, startingSnapshotId, deleteConflictDetectionFilter,
deletedDataFiles, caseSensitive);
}
}
}
23 changes: 17 additions & 6 deletions core/src/main/java/org/apache/iceberg/BaseRowDelta.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class BaseRowDelta extends MergingSnapshotProducer<RowDelta> implements RowDelta
private Long startingSnapshotId = null; // check all versions by default
private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
private boolean validateDeletes = false;
private Expression conflictDetectionFilter = null;
private Expression appendConflictDetectionFilter = null;
private Expression deleteConflictDetectionFilter = null;
private boolean caseSensitive = true;

BaseRowDelta(String tableName, TableOperations ops) {
Expand Down Expand Up @@ -83,7 +84,14 @@ public RowDelta validateDataFilesExist(Iterable<? extends CharSequence> referenc
@Override
public RowDelta validateNoConflictingAppends(Expression newConflictDetectionFilter) {
Preconditions.checkArgument(newConflictDetectionFilter != null, "Conflict detection filter cannot be null");
this.conflictDetectionFilter = newConflictDetectionFilter;
this.appendConflictDetectionFilter = newConflictDetectionFilter;
return this;
}

@Override
public RowDelta validateNoConflictingDeleteFiles(Expression newConflictDetectionFilter) {
Preconditions.checkArgument(newConflictDetectionFilter != null, "Conflict detection filter cannot be null");
this.deleteConflictDetectionFilter = newConflictDetectionFilter;
return this;
}

Expand All @@ -92,12 +100,15 @@ protected void validate(TableMetadata base) {
if (base.currentSnapshot() != null) {
if (!referencedDataFiles.isEmpty()) {
validateDataFilesExist(
base, startingSnapshotId, referencedDataFiles, !validateDeletes, conflictDetectionFilter);
base, startingSnapshotId, referencedDataFiles, !validateDeletes, appendConflictDetectionFilter);
}

if (appendConflictDetectionFilter != null) {
validateAddedDataFiles(base, startingSnapshotId, appendConflictDetectionFilter, caseSensitive);
}

// TODO: does this need to check new delete files?
if (conflictDetectionFilter != null) {
validateAddedDataFiles(base, startingSnapshotId, conflictDetectionFilter, caseSensitive);
if (deleteConflictDetectionFilter != null) {
validateNoNewDeletes(base, startingSnapshotId, deleteConflictDetectionFilter, caseSensitive);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check is quite trivial. For example, we won't be able to resolve conflicts within the same partition. I have outlined a way to optimize it here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean it cannot resolve within same data file (I thought we are passing data filter)? Or within the same partition?

And also for my learning, you mean it will be over-aggressive and report false negatives even if rows do not actually conflict, until we make the optimization.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you mean it will be over-aggressive and report false negatives even if rows do not actually conflict, until we make the optimization.

Yeah, it may report false positives. The data filter is helpful but I think it won't help much within the same partition. Position deletes are scoped to a partition so the data filter should help us when there is a concurrent delete in another partition. Within the partition, though, most of position deletes will match that row filter as we don't persist the deleted row (by default).

Copy link
Contributor

@jackye1995 jackye1995 Sep 24, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit late to the whole discussion. Regarding the check, I read the outlined way to optimize it, just want to share some thoughts based on what I am doing for position deletes of my internal distribution as of today.

In my system, each position delete file written contains exactly 1 file_path value, which avoids the requirement from the spec to sort by file path and also greatly simplifies the validation during concurrent commits, because each check can easily find all position deletes of each data file and check against just the position min max to see if there is any potential overlapping of the position range. Of course this cannot be applied to a general use case, it was implemented just to see what can be achieved with a closed system where all delete writers only write that specific type of position delete file.

When I started to compact position delete files to contain multiple file_path values, it becomes very easy to have false-positives, especially in the object storage mode where the file_path min and max does not really mean anything anymore. So at least from the object storage use case, secondary index with much better file skipping ability is a must have to make the strategy described truly work efficiently.

}
}
}
Expand Down
14 changes: 14 additions & 0 deletions core/src/main/java/org/apache/iceberg/DeleteFileIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ public boolean isEmpty() {
return (globalDeletes == null || globalDeletes.length == 0) && sortedDeletesByPartition.isEmpty();
}

public List<DeleteFile> referencedDeleteFiles() {
List<DeleteFile> deleteFiles = Lists.newArrayList();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional comment: small optimization can be done by knowing the initial length, and checking isEmpty

if (isEmpty()) {
   return Lists.empty()
else
   List<DeleteFile> deleteFiles = Lists.newArrayList(globalDeletes.length + sortedDeletesByPartition.length);
   ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was about to implement this but then I realized sortedDeletesByPartition.length is not accurate as each entry contains an array of delete files. In order to compute a right estimate, I'd need to iterate through the map.


if (globalDeletes != null) {
deleteFiles.addAll(Arrays.asList(globalDeletes));
}

sortedDeletesByPartition.forEach((key, partitionDeletes) -> {
deleteFiles.addAll(Arrays.asList(partitionDeletes.second()));
});

return deleteFiles;
}

private StructLikeWrapper newWrapper(int specId) {
return StructLikeWrapper.forType(partitionTypeById.get(specId));
}
Expand Down
78 changes: 71 additions & 7 deletions core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ abstract class MergingSnapshotProducer<ThisT> extends SnapshotProducer<ThisT> {
private static final Set<String> VALIDATE_DATA_FILES_EXIST_SKIP_DELETE_OPERATIONS =
ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.REPLACE);
// delete files can be added in "overwrite" or "delete" operations
private static final Set<String> VALIDATE_REPLACED_DATA_FILES_OPERATIONS =
private static final Set<String> VALIDATE_ADDED_DELETE_FILES_OPERATIONS =
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed it to match VALIDATE_ADDED_FILES_OPERATIONS .

ImmutableSet.of(DataOperations.OVERWRITE, DataOperations.DELETE);

private final String tableName;
Expand Down Expand Up @@ -293,20 +293,33 @@ protected void validateAddedDataFiles(TableMetadata base, Long startingSnapshotI
*/
protected void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId,
Iterable<DataFile> dataFiles) {
validateNoNewDeletesForDataFiles(base, startingSnapshotId, null, dataFiles, true);
}

/**
* Validates that no new delete files that must be applied to the given data files have been added to the table since
* a starting snapshot.
*
* @param base table metadata to validate
* @param startingSnapshotId id of the snapshot current at the start of the operation
* @param dataFilter a data filter
* @param dataFiles data files to validate have no new row deletes
* @param caseSensitive whether expression binding should be case-sensitive
*/
protected void validateNoNewDeletesForDataFiles(TableMetadata base, Long startingSnapshotId,
Expression dataFilter, Iterable<DataFile> dataFiles,
boolean caseSensitive) {
// if there is no current table state, no files have been added
if (base.currentSnapshot() == null) {
return;
}

Pair<List<ManifestFile>, Set<Long>> history =
validationHistory(base, startingSnapshotId, VALIDATE_REPLACED_DATA_FILES_OPERATIONS, ManifestContent.DELETES);
validationHistory(base, startingSnapshotId, VALIDATE_ADDED_DELETE_FILES_OPERATIONS, ManifestContent.DELETES);
List<ManifestFile> deleteManifests = history.first();

long startingSequenceNumber = startingSnapshotId == null ? 0 : base.snapshot(startingSnapshotId).sequenceNumber();
DeleteFileIndex deletes = DeleteFileIndex.builderFor(ops.io(), deleteManifests)
.afterSequenceNumber(startingSequenceNumber)
.specsById(ops.current().specsById())
.build();
long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId);
DeleteFileIndex deletes = buildDeleteFileIndex(deleteManifests, startingSequenceNumber, dataFilter, caseSensitive);

for (DataFile dataFile : dataFiles) {
// if any delete is found that applies to files written in or before the starting snapshot, fail
Expand All @@ -316,6 +329,57 @@ protected void validateNoNewDeletesForDataFiles(TableMetadata base, Long startin
}
}

/**
* Validates that no delete files matching a filter have been added to the table since a starting snapshot.
*
* @param base table metadata to validate
* @param startingSnapshotId id of the snapshot current at the start of the operation
* @param dataFilter an expression used to find new conflicting delete files
* @param caseSensitive whether expression evaluation should be case-sensitive
*/
protected void validateNoNewDeletes(TableMetadata base, Long startingSnapshotId,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a slightly more accurate name would be validateNoNewDeleteFiles since this checks that there aren't any new delete files, but data files could have been deleted.

Expression dataFilter, boolean caseSensitive) {
// if there is no current table state, no files have been added
if (base.currentSnapshot() == null) {
return;
}

Pair<List<ManifestFile>, Set<Long>> history =
validationHistory(base, startingSnapshotId, VALIDATE_ADDED_DELETE_FILES_OPERATIONS, ManifestContent.DELETES);
List<ManifestFile> deleteManifests = history.first();

long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId);
DeleteFileIndex deletes = buildDeleteFileIndex(deleteManifests, startingSequenceNumber, dataFilter, caseSensitive);

ValidationException.check(deletes.isEmpty(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding this!

"Found new conflicting delete files that can apply to records matching %s: %s",
dataFilter, Iterables.transform(deletes.referencedDeleteFiles(), ContentFile::path));
}

// use 0 as a starting seq number if the starting snapshot is not set or expired
private long startingSequenceNumber(TableMetadata metadata, Long staringSnapshotId) {
if (staringSnapshotId != null && metadata.snapshot(staringSnapshotId) != null) {
Snapshot startingSnapshot = metadata.snapshot(staringSnapshotId);
return startingSnapshot.sequenceNumber();
} else {
return 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can use TableMetadata.INITIAL_SEQUENCE_NUMBER and remove the comment

}
}

private DeleteFileIndex buildDeleteFileIndex(List<ManifestFile> deleteManifests, long startingSequenceNumber,
Expression dataFilter, boolean caseSensitive) {
DeleteFileIndex.Builder builder = DeleteFileIndex.builderFor(ops.io(), deleteManifests)
.afterSequenceNumber(startingSequenceNumber)
.caseSensitive(caseSensitive)
.specsById(ops.current().specsById());

if (dataFilter != null) {
builder.filterData(dataFilter);
}

return builder.build();
}

@SuppressWarnings("CollectionUndefinedEquality")
protected void validateDataFilesExist(TableMetadata base, Long startingSnapshotId,
CharSequenceSet requiredDataFiles, boolean skipDeletes,
Expand Down
16 changes: 15 additions & 1 deletion core/src/test/java/org/apache/iceberg/TableTestBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ public class TableTestBase {
.build();
// Equality delete files.
static final DeleteFile FILE_A2_DELETES = FileMetadata.deleteFileBuilder(SPEC)
.ofEqualityDeletes(3)
.ofEqualityDeletes(1)
.withPath("/path/to/data-a2-deletes.parquet")
.withFileSizeInBytes(10)
.withPartitionPath("data_bucket=0")
Expand Down Expand Up @@ -364,6 +364,20 @@ void validateTableFiles(Table tbl, DataFile... expectedFiles) {
Assert.assertEquals("Files should match", expectedFilePaths, actualFilePaths);
}

void validateTableDeleteFiles(Table tbl, DeleteFile... expectedFiles) {
Set<CharSequence> expectedFilePaths = Sets.newHashSet();
for (DeleteFile file : expectedFiles) {
expectedFilePaths.add(file.path());
}
Set<CharSequence> actualFilePaths = Sets.newHashSet();
for (FileScanTask task : tbl.newScan().planFiles()) {
for (DeleteFile file : task.deletes()) {
actualFilePaths.add(file.path());
}
}
Assert.assertEquals("Delete files should match", expectedFilePaths, actualFilePaths);
}

List<String> paths(DataFile... dataFiles) {
List<String> paths = Lists.newArrayListWithExpectedSize(dataFiles.length);
for (DataFile file : dataFiles) {
Expand Down
Loading