Skip to content
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
9e43889
Append changes to snapshot branch
Jun 1, 2022
e6ea5f6
Adding snapshot to branch Impl
Jun 9, 2022
b43b80b
Adding check to see if it's a branch
Jun 13, 2022
9cb39c2
Creating branch if ref absent
Jun 16, 2022
ba341ec
Building after setting ref
Jun 16, 2022
d1ca432
Setting default branch as main
Jun 21, 2022
0d79ef0
Fetching current when no main ref is set.
Jun 22, 2022
495c0f3
checkStyle fixes
Jun 29, 2022
f7464eb
build fixes
Jun 29, 2022
39de1e4
Validation check for branch
Jul 2, 2022
7e75c68
Merge branch 'master' of https://github.com/apache/iceberg into branc…
Jul 6, 2022
080d76e
Restricting toBranch only for BaseRowDelta
Jul 9, 2022
1dc4f89
removing new line
Jul 10, 2022
41b2f62
Updating BaseRowDelta as per SnapshotProducer
namrathamk Aug 23, 2022
0c18302
Updating BaseRowDelta as per SnapshotProducer
namrathamk Aug 23, 2022
0acca3b
Merge branch 'master' of https://github.com/apache/iceberg into branc…
namrathamk Aug 23, 2022
49b6667
Updating BaseRowDelta as per SnapshotProducer
namrathamk Aug 23, 2022
4ab3414
Merge branch 'master' of https://github.com/apache/iceberg into branc…
namrathamk Aug 24, 2022
fb28c02
Merge branch 'master' of https://github.com/apache/iceberg into branc…
namrathamk Aug 26, 2022
cfc5e67
Updating BaseRowDelta as per SnapshotProducer
namrathamk Aug 28, 2022
70a3cf3
build fixes
namrathamyske Aug 28, 2022
a64b837
Merge branch 'master' of https://github.com/apache/iceberg into branc…
Jan 5, 2023
f9b3d67
fixing tests to take parameters, Snapshot summary fix
Jan 6, 2023
cd5569c
fetching data files and delete files by ref
Jan 6, 2023
3d5659b
spotless fix
namrathamyske Jan 8, 2023
0028bcd
Test case for overwrite and rewrite
namrathamyske Jan 11, 2023
7173dcb
Removing unnecessary testcases
namrathamyske Jan 14, 2023
caf9d59
Merge branch 'master' of https://github.com/apache/iceberg into branc…
namrathamyske Jan 15, 2023
4f64b09
Build fixes
namrathamyske Jan 15, 2023
90f23ab
Core: Add tests for replace partitions with branching, fix check for …
amogh-jahagirdar Jan 17, 2023
4b8ac6d
Merge pull request #20 from amogh-jahagirdar/branch-replace-partitions
namrathamyske Jan 18, 2023
c6df1e7
Pull from master
namrathamyske Jan 18, 2023
defff1d
Merge remote-tracking branch 'origin/branch-checks-final' into branch…
namrathamyske Jan 18, 2023
66e1850
overwrite validation tests
namrathamyske Jan 18, 2023
fc8780c
variable name change
namrathamyske Jan 18, 2023
6b6aefc
Uncomment test.
rdblue Jan 20, 2023
1a98e54
Core: Deprecate validation methods which don't accept parent snapshot…
amogh-jahagirdar Jan 21, 2023
74bab58
Merge pull request #21 from amogh-jahagirdar/row-delta-rev-api
namrathamyske Jan 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions .palantir/revapi.yml

Large diffs are not rendered by default.

14 changes: 10 additions & 4 deletions core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ public OverwriteFiles validateNoConflictingDeletes() {
return this;
}

@Override
public BaseOverwriteFiles toBranch(String branch) {
targetBranch(branch);
return this;
}

@Override
protected void validate(TableMetadata base, Snapshot snapshot) {
if (validateAddedFilesMatchOverwriteFilter) {
Expand Down Expand Up @@ -133,19 +139,19 @@ protected void validate(TableMetadata base, Snapshot snapshot) {
}

if (validateNewDataFiles) {
validateAddedDataFiles(base, startingSnapshotId, dataConflictDetectionFilter());
validateAddedDataFiles(base, startingSnapshotId, dataConflictDetectionFilter(), snapshot);
}

if (validateNewDeletes) {
if (rowFilter() != Expressions.alwaysFalse()) {
Expression filter = conflictDetectionFilter != null ? conflictDetectionFilter : rowFilter();
validateNoNewDeleteFiles(base, startingSnapshotId, filter);
validateDeletedDataFiles(base, startingSnapshotId, filter);
validateNoNewDeleteFiles(base, startingSnapshotId, filter, snapshot);
validateDeletedDataFiles(base, startingSnapshotId, filter, snapshot);
}

if (deletedDataFiles.size() > 0) {
validateNoNewDeletesForDataFiles(
base, startingSnapshotId, conflictDetectionFilter, deletedDataFiles);
base, startingSnapshotId, conflictDetectionFilter, deletedDataFiles, snapshot);
}
}
}
Expand Down
21 changes: 15 additions & 6 deletions core/src/main/java/org/apache/iceberg/BaseReplacePartitions.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,23 +79,32 @@ public ReplacePartitions validateNoConflictingData() {
return this;
}

@Override
public BaseReplacePartitions toBranch(String branch) {
targetBranch(branch);
return this;
}

@Override
public void validate(TableMetadata currentMetadata, Snapshot snapshot) {
if (validateConflictingData) {
if (dataSpec().isUnpartitioned()) {
validateAddedDataFiles(currentMetadata, startingSnapshotId, Expressions.alwaysTrue());
validateAddedDataFiles(
currentMetadata, startingSnapshotId, Expressions.alwaysTrue(), snapshot);
} else {
validateAddedDataFiles(currentMetadata, startingSnapshotId, replacedPartitions);
validateAddedDataFiles(currentMetadata, startingSnapshotId, replacedPartitions, snapshot);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: I think it makes more sense to use parent rather than snapshot for the parent snapshot variable. Not a blocker, but it makes it easier to understand what's going on.

}
}

if (validateConflictingDeletes) {
if (dataSpec().isUnpartitioned()) {
validateDeletedDataFiles(currentMetadata, startingSnapshotId, Expressions.alwaysTrue());
validateNoNewDeleteFiles(currentMetadata, startingSnapshotId, Expressions.alwaysTrue());
validateDeletedDataFiles(
currentMetadata, startingSnapshotId, Expressions.alwaysTrue(), snapshot);
validateNoNewDeleteFiles(
currentMetadata, startingSnapshotId, Expressions.alwaysTrue(), snapshot);
} else {
validateDeletedDataFiles(currentMetadata, startingSnapshotId, replacedPartitions);
validateNoNewDeleteFiles(currentMetadata, startingSnapshotId, replacedPartitions);
validateDeletedDataFiles(currentMetadata, startingSnapshotId, replacedPartitions, snapshot);
validateNoNewDeleteFiles(currentMetadata, startingSnapshotId, replacedPartitions, snapshot);
}
}
}
Expand Down
8 changes: 7 additions & 1 deletion core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,18 @@ public RewriteFiles validateFromSnapshot(long snapshotId) {
return this;
}

@Override
public BaseRewriteFiles toBranch(String branch) {
targetBranch(branch);
return this;
}

@Override
protected void validate(TableMetadata base, Snapshot snapshot) {
if (replacedDataFiles.size() > 0) {
// if there are replaced data files, there cannot be any new row-level deletes for those data
// files
validateNoNewDeletesForDataFiles(base, startingSnapshotId, replacedDataFiles);
validateNoNewDeletesForDataFiles(base, startingSnapshotId, replacedDataFiles, snapshot);
}
}
}
25 changes: 20 additions & 5 deletions core/src/main/java/org/apache/iceberg/BaseRowDelta.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.util.CharSequenceSet;
import org.apache.iceberg.util.SnapshotUtil;

class BaseRowDelta extends MergingSnapshotProducer<RowDelta> implements RowDelta {
private Long startingSnapshotId = null; // check all versions by default
Expand Down Expand Up @@ -96,23 +97,37 @@ public RowDelta validateNoConflictingDeleteFiles() {
}

@Override
protected void validate(TableMetadata base, Snapshot snapshot) {
if (base.currentSnapshot() != null) {
public RowDelta toBranch(String branch) {
targetBranch(branch);
return this;
}

@Override
protected void validate(TableMetadata base, Snapshot parent) {
if (parent != null) {
if (startingSnapshotId != null) {
Preconditions.checkArgument(
SnapshotUtil.isAncestorOf(parent.snapshotId(), startingSnapshotId, base::snapshot),
"Snapshot %s is not an ancestor of %s",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think a bit more context would be helpful if you ever encounter this error. "Cannot validate changes: starting snapshot %s is not an ancestor of parent snapshot %s (branch %s)"

Copy link
Contributor Author

@namrathamyske namrathamyske Jan 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@amogh-jahagirdar Is this ancestor check even required anymore ? We are anyway doing ancestor check in

startingSnapshotId,
parent.snapshotId());
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: whitespace after this block.

if (!referencedDataFiles.isEmpty()) {
validateDataFilesExist(
base,
startingSnapshotId,
referencedDataFiles,
!validateDeletes,
conflictDetectionFilter);
conflictDetectionFilter,
parent);
}

if (validateNewDataFiles) {
validateAddedDataFiles(base, startingSnapshotId, conflictDetectionFilter);
validateAddedDataFiles(base, startingSnapshotId, conflictDetectionFilter, parent);
}

if (validateNewDeleteFiles) {
validateNoNewDeleteFiles(base, startingSnapshotId, conflictDetectionFilter);
validateNoNewDeleteFiles(base, startingSnapshotId, conflictDetectionFilter, parent);
}
}
}
Expand Down
89 changes: 55 additions & 34 deletions core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ private ManifestFile copyManifest(ManifestFile manifest) {
* @param partitionSet a set of partitions to filter new conflicting data files
*/
protected void validateAddedDataFiles(
TableMetadata base, Long startingSnapshotId, PartitionSet partitionSet) {
TableMetadata base, Long startingSnapshotId, PartitionSet partitionSet, Snapshot parent) {
CloseableIterable<ManifestEntry<DataFile>> conflictEntries =
addedDataFiles(base, startingSnapshotId, null, partitionSet);
addedDataFiles(base, startingSnapshotId, null, partitionSet, parent);

try (CloseableIterator<ManifestEntry<DataFile>> conflicts = conflictEntries.iterator()) {
if (conflicts.hasNext()) {
Expand All @@ -305,9 +305,12 @@ protected void validateAddedDataFiles(
* @param conflictDetectionFilter an expression used to find new conflicting data files
*/
protected void validateAddedDataFiles(
TableMetadata base, Long startingSnapshotId, Expression conflictDetectionFilter) {
TableMetadata base,
Long startingSnapshotId,
Expression conflictDetectionFilter,
Snapshot parent) {
CloseableIterable<ManifestEntry<DataFile>> conflictEntries =
addedDataFiles(base, startingSnapshotId, conflictDetectionFilter, null);
addedDataFiles(base, startingSnapshotId, conflictDetectionFilter, null, parent);

try (CloseableIterator<ManifestEntry<DataFile>> conflicts = conflictEntries.iterator()) {
if (conflicts.hasNext()) {
Expand Down Expand Up @@ -337,15 +340,20 @@ private CloseableIterable<ManifestEntry<DataFile>> addedDataFiles(
TableMetadata base,
Long startingSnapshotId,
Expression dataFilter,
PartitionSet partitionSet) {
PartitionSet partitionSet,
Snapshot parent) {
// if there is no current table state, no files have been added
if (base.currentSnapshot() == null) {
if (parent == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad, missed this when I raised the change to your branch. I think we should update this comment to say "If there is no parent, no files have been added"

return CloseableIterable.empty();
}

Pair<List<ManifestFile>, Set<Long>> history =
validationHistory(
base, startingSnapshotId, VALIDATE_ADDED_FILES_OPERATIONS, ManifestContent.DATA);
base,
startingSnapshotId,
VALIDATE_ADDED_FILES_OPERATIONS,
ManifestContent.DATA,
parent);
List<ManifestFile> manifests = history.first();
Set<Long> newSnapshots = history.second();

Expand Down Expand Up @@ -379,9 +387,9 @@ private CloseableIterable<ManifestEntry<DataFile>> addedDataFiles(
* @param dataFiles data files to validate have no new row deletes
*/
protected void validateNoNewDeletesForDataFiles(
TableMetadata base, Long startingSnapshotId, Iterable<DataFile> dataFiles) {
TableMetadata base, Long startingSnapshotId, Iterable<DataFile> dataFiles, Snapshot parent) {
validateNoNewDeletesForDataFiles(
base, startingSnapshotId, null, dataFiles, newFilesSequenceNumber != null);
base, startingSnapshotId, null, dataFiles, newFilesSequenceNumber != null, parent);
}

/**
Expand All @@ -397,8 +405,10 @@ protected void validateNoNewDeletesForDataFiles(
TableMetadata base,
Long startingSnapshotId,
Expression dataFilter,
Iterable<DataFile> dataFiles) {
validateNoNewDeletesForDataFiles(base, startingSnapshotId, dataFilter, dataFiles, false);
Iterable<DataFile> dataFiles,
Snapshot parent) {
validateNoNewDeletesForDataFiles(
base, startingSnapshotId, dataFilter, dataFiles, false, parent);
}

/**
Expand All @@ -423,13 +433,14 @@ private void validateNoNewDeletesForDataFiles(
Long startingSnapshotId,
Expression dataFilter,
Iterable<DataFile> dataFiles,
boolean ignoreEqualityDeletes) {
boolean ignoreEqualityDeletes,
Snapshot parent) {
// if there is no current table state, no files have been added
if (base.currentSnapshot() == null || base.formatVersion() < 2) {
if (parent == null || base.formatVersion() < 2) {
return;
}

DeleteFileIndex deletes = addedDeleteFiles(base, startingSnapshotId, dataFilter, null);
DeleteFileIndex deletes = addedDeleteFiles(base, startingSnapshotId, dataFilter, null, parent);

long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId);
for (DataFile dataFile : dataFiles) {
Expand Down Expand Up @@ -460,8 +471,8 @@ private void validateNoNewDeletesForDataFiles(
* @param dataFilter an expression used to find new conflicting delete files
*/
protected void validateNoNewDeleteFiles(
TableMetadata base, Long startingSnapshotId, Expression dataFilter) {
DeleteFileIndex deletes = addedDeleteFiles(base, startingSnapshotId, dataFilter, null);
TableMetadata base, Long startingSnapshotId, Expression dataFilter, Snapshot parent) {
DeleteFileIndex deletes = addedDeleteFiles(base, startingSnapshotId, dataFilter, null, parent);
ValidationException.check(
deletes.isEmpty(),
"Found new conflicting delete files that can apply to records matching %s: %s",
Expand All @@ -478,8 +489,9 @@ protected void validateNoNewDeleteFiles(
* @param partitionSet a partition set used to find new conflicting delete files
*/
protected void validateNoNewDeleteFiles(
TableMetadata base, Long startingSnapshotId, PartitionSet partitionSet) {
DeleteFileIndex deletes = addedDeleteFiles(base, startingSnapshotId, null, partitionSet);
TableMetadata base, Long startingSnapshotId, PartitionSet partitionSet, Snapshot parent) {
DeleteFileIndex deletes =
addedDeleteFiles(base, startingSnapshotId, null, partitionSet, parent);
ValidationException.check(
deletes.isEmpty(),
"Found new conflicting delete files that can apply to records matching %s: %s",
Expand All @@ -499,9 +511,10 @@ protected DeleteFileIndex addedDeleteFiles(
TableMetadata base,
Long startingSnapshotId,
Expression dataFilter,
PartitionSet partitionSet) {
PartitionSet partitionSet,
Snapshot parent) {
// if there is no current table state, return empty delete file index
if (base.currentSnapshot() == null || base.formatVersion() < 2) {
if (parent == null || base.formatVersion() < 2) {
return DeleteFileIndex.builderFor(ops.io(), ImmutableList.of())
.specsById(base.specsById())
.build();
Expand All @@ -512,7 +525,8 @@ protected DeleteFileIndex addedDeleteFiles(
base,
startingSnapshotId,
VALIDATE_ADDED_DELETE_FILES_OPERATIONS,
ManifestContent.DELETES);
ManifestContent.DELETES,
parent);
List<ManifestFile> deleteManifests = history.first();

long startingSequenceNumber = startingSequenceNumber(base, startingSnapshotId);
Expand All @@ -528,9 +542,9 @@ protected DeleteFileIndex addedDeleteFiles(
* @param dataFilter an expression used to find deleted data files
*/
protected void validateDeletedDataFiles(
TableMetadata base, Long startingSnapshotId, Expression dataFilter) {
TableMetadata base, Long startingSnapshotId, Expression dataFilter, Snapshot parent) {
CloseableIterable<ManifestEntry<DataFile>> conflictEntries =
deletedDataFiles(base, startingSnapshotId, dataFilter, null);
deletedDataFiles(base, startingSnapshotId, dataFilter, null, parent);

try (CloseableIterator<ManifestEntry<DataFile>> conflicts = conflictEntries.iterator()) {
if (conflicts.hasNext()) {
Expand All @@ -556,9 +570,9 @@ protected void validateDeletedDataFiles(
* @param partitionSet a partition set used to find deleted data files
*/
protected void validateDeletedDataFiles(
TableMetadata base, Long startingSnapshotId, PartitionSet partitionSet) {
TableMetadata base, Long startingSnapshotId, PartitionSet partitionSet, Snapshot parent) {
CloseableIterable<ManifestEntry<DataFile>> conflictEntries =
deletedDataFiles(base, startingSnapshotId, null, partitionSet);
deletedDataFiles(base, startingSnapshotId, null, partitionSet, parent);

try (CloseableIterator<ManifestEntry<DataFile>> conflicts = conflictEntries.iterator()) {
if (conflicts.hasNext()) {
Expand Down Expand Up @@ -588,15 +602,20 @@ private CloseableIterable<ManifestEntry<DataFile>> deletedDataFiles(
TableMetadata base,
Long startingSnapshotId,
Expression dataFilter,
PartitionSet partitionSet) {
PartitionSet partitionSet,
Snapshot parent) {
// if there is no current table state, no files have been deleted
if (base.currentSnapshot() == null) {
if (parent == null) {
return CloseableIterable.empty();
}

Pair<List<ManifestFile>, Set<Long>> history =
validationHistory(
base, startingSnapshotId, VALIDATE_DATA_FILES_EXIST_OPERATIONS, ManifestContent.DATA);
base,
startingSnapshotId,
VALIDATE_DATA_FILES_EXIST_OPERATIONS,
ManifestContent.DATA,
parent);
List<ManifestFile> manifests = history.first();
Set<Long> newSnapshots = history.second();

Expand Down Expand Up @@ -662,9 +681,10 @@ protected void validateDataFilesExist(
Long startingSnapshotId,
CharSequenceSet requiredDataFiles,
boolean skipDeletes,
Expression conflictDetectionFilter) {
Expression conflictDetectionFilter,
Snapshot parent) {
// if there is no current table state, no files have been removed
if (base.currentSnapshot() == null) {
if (parent == null) {
return;
}

Expand All @@ -674,7 +694,8 @@ protected void validateDataFilesExist(
: VALIDATE_DATA_FILES_EXIST_OPERATIONS;

Pair<List<ManifestFile>, Set<Long>> history =
validationHistory(base, startingSnapshotId, matchingOperations, ManifestContent.DATA);
validationHistory(
base, startingSnapshotId, matchingOperations, ManifestContent.DATA, parent);
List<ManifestFile> manifests = history.first();
Set<Long> newSnapshots = history.second();

Expand Down Expand Up @@ -710,14 +731,14 @@ private Pair<List<ManifestFile>, Set<Long>> validationHistory(
TableMetadata base,
Long startingSnapshotId,
Set<String> matchingOperations,
ManifestContent content) {
ManifestContent content,
Snapshot parent) {
List<ManifestFile> manifests = Lists.newArrayList();
Set<Long> newSnapshots = Sets.newHashSet();

Snapshot lastSnapshot = null;
Iterable<Snapshot> snapshots =
SnapshotUtil.ancestorsBetween(
base.currentSnapshot().snapshotId(), startingSnapshotId, base::snapshot);
SnapshotUtil.ancestorsBetween(parent.snapshotId(), startingSnapshotId, base::snapshot);
for (Snapshot currentSnapshot : snapshots) {
lastSnapshot = currentSnapshot;

Expand Down
7 changes: 4 additions & 3 deletions core/src/main/java/org/apache/iceberg/SnapshotProducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,10 @@ private Map<String, String> summary(TableMetadata previous) {
}

Map<String, String> previousSummary;
if (previous.currentSnapshot() != null) {
if (previous.currentSnapshot().summary() != null) {
previousSummary = previous.currentSnapshot().summary();
SnapshotRef previousBranchHead = previous.ref(targetBranch);
if (previousBranchHead != null) {
if (previous.snapshot(previousBranchHead.snapshotId()).summary() != null) {
previousSummary = previous.snapshot(previousBranchHead.snapshotId()).summary();
} else {
// previous snapshot had no summary, use an empty summary
previousSummary = ImmutableMap.of();
Expand Down
Loading