-
Notifications
You must be signed in to change notification settings - Fork 3k
Core: Replace setCurrentSnapshot with setBranchSnapshot in metadata builder #4089
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6dba027
90dc50d
eaeca5b
edd5d68
e997e7f
523f388
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -282,14 +282,18 @@ public void commit() { | |
| .run(taskOps -> { | ||
| Snapshot newSnapshot = apply(); | ||
| newSnapshotId.set(newSnapshot.snapshotId()); | ||
| TableMetadata updated; | ||
| if (stageOnly) { | ||
| updated = base.addStagedSnapshot(newSnapshot); | ||
| TableMetadata.Builder update = TableMetadata.buildFrom(base); | ||
| if (base.snapshot(newSnapshot.snapshotId()) != null) { | ||
| // this is a rollback operation | ||
| update.setBranchSnapshot(newSnapshot.snapshotId(), SnapshotRef.MAIN_BRANCH); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @wypoon and @sririshindra, I think this fixes the problem from #4088. The previous call to
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that makes sense. |
||
| } else if (stageOnly) { | ||
| update.addSnapshot(newSnapshot); | ||
| } else { | ||
| updated = base.replaceCurrentSnapshot(newSnapshot); | ||
| update.setBranchSnapshot(newSnapshot, SnapshotRef.MAIN_BRANCH); | ||
| } | ||
|
|
||
| if (updated == base) { | ||
| TableMetadata updated = update.build(); | ||
| if (updated.changes().isEmpty()) { | ||
| // do not commit if the metadata has not changed. for example, this may happen when setting the current | ||
| // snapshot to an ID that is already current. note that this check uses identity. | ||
| return; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -496,14 +496,6 @@ public TableMetadata replaceSortOrder(SortOrder newOrder) { | |
| return new Builder(this).setDefaultSortOrder(newOrder).build(); | ||
| } | ||
|
|
||
| public TableMetadata addStagedSnapshot(Snapshot snapshot) { | ||
| return new Builder(this).addSnapshot(snapshot).build(); | ||
| } | ||
|
|
||
| public TableMetadata replaceCurrentSnapshot(Snapshot snapshot) { | ||
| return new Builder(this).setCurrentSnapshot(snapshot).build(); | ||
| } | ||
|
|
||
| public TableMetadata removeSnapshotsIf(Predicate<Snapshot> removeIf) { | ||
| List<Snapshot> toRemove = snapshots.stream().filter(removeIf).collect(Collectors.toList()); | ||
| return new Builder(this).removeSnapshots(toRemove).build(); | ||
|
|
@@ -610,7 +602,7 @@ public TableMetadata buildReplacement(Schema updatedSchema, PartitionSpec update | |
|
|
||
| return new Builder(this) | ||
| .upgradeFormatVersion(newFormatVersion) | ||
| .setCurrentSnapshot(null) | ||
| .removeBranch(SnapshotRef.MAIN_BRANCH) | ||
| .setCurrentSchema(freshSchema, newLastColumnId.get()) | ||
| .setDefaultPartitionSpec(freshSpec) | ||
| .setDefaultSortOrder(freshSortOrder) | ||
|
|
@@ -936,23 +928,38 @@ public Builder addSnapshot(Snapshot snapshot) { | |
| return this; | ||
| } | ||
|
|
||
| public Builder setCurrentSnapshot(Snapshot snapshot) { | ||
| public Builder setBranchSnapshot(Snapshot snapshot, String branch) { | ||
| addSnapshot(snapshot); | ||
| setCurrentSnapshot(snapshot, null); | ||
| setBranchSnapshot(snapshot, branch, null); | ||
| return this; | ||
| } | ||
|
|
||
| public Builder setCurrentSnapshot(long snapshotId) { | ||
| if (currentSnapshotId == snapshotId) { | ||
| public Builder setBranchSnapshot(long snapshotId, String branch) { | ||
| SnapshotRef ref = refs.get(branch); | ||
| if (ref != null && ref.snapshotId() == snapshotId) { | ||
| // change is a noop | ||
| return this; | ||
| } | ||
|
|
||
| Snapshot snapshot = snapshotsById.get(snapshotId); | ||
| ValidationException.check(snapshot != null, | ||
| "Cannot set current snapshot to unknown: %s", snapshotId); | ||
| ValidationException.check(snapshot != null, "Cannot set %s to unknown snapshot: %s", branch, snapshotId); | ||
|
|
||
| setBranchSnapshot(snapshot, branch, System.currentTimeMillis()); | ||
|
|
||
| return this; | ||
| } | ||
|
|
||
| setCurrentSnapshot(snapshot, System.currentTimeMillis()); | ||
| public Builder removeBranch(String branch) { | ||
| if (SnapshotRef.MAIN_BRANCH.equals(branch)) { | ||
| this.currentSnapshotId = -1; | ||
| snapshotLog.clear(); | ||
| } | ||
|
|
||
| SnapshotRef ref = refs.remove(branch); | ||
| if (ref != null) { | ||
| ValidationException.check(ref.isBranch(), "Cannot remove branch: %s is a tag", branch); | ||
| changes.add(new MetadataUpdate.RemoveSnapshotRef(branch)); | ||
| } | ||
|
|
||
| return this; | ||
| } | ||
|
|
@@ -972,10 +979,17 @@ public Builder removeSnapshots(List<Snapshot> snapshotsToRemove) { | |
| } | ||
|
|
||
| this.snapshots = retainedSnapshots; | ||
| if (!snapshotsById.containsKey(currentSnapshotId)) { | ||
| setCurrentSnapshot(null, System.currentTimeMillis()); | ||
|
|
||
| // remove any refs that are no longer valid | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this fail if there are still refs?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on the current usage, we are only calling this when expiring snapshots. At that time the snapshots send in here should already be the ones that have to be removed after evaluating all retention policies. I think that's why we directly remove the refs instead of failing.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is in the scope for removeSnapshots(snapshots) to remove the references itself for any snapshots we are removing. Otherwise we would have to have the caller do a separate removeReferences and then do a removeSnapshots which seems more cumbersome. |
||
| Set<String> danglingRefs = Sets.newHashSet(); | ||
| for (Map.Entry<String, SnapshotRef> refEntry : refs.entrySet()) { | ||
| if (!snapshotsById.containsKey(refEntry.getValue().snapshotId())) { | ||
| danglingRefs.add(refEntry.getKey()); | ||
| } | ||
| } | ||
|
|
||
| danglingRefs.forEach(this::removeBranch); | ||
|
|
||
| return this; | ||
| } | ||
|
|
||
|
|
@@ -1180,26 +1194,36 @@ private int reuseOrCreateNewSortOrderId(SortOrder newOrder) { | |
| return newOrderId; | ||
| } | ||
|
|
||
| private void setCurrentSnapshot(Snapshot snapshot, Long currentTimestampMillis) { | ||
| if (snapshot == null) { | ||
| this.currentSnapshotId = -1; | ||
| snapshotLog.clear(); | ||
| changes.add(new MetadataUpdate.SetCurrentSnapshot(null)); | ||
| return; | ||
| } | ||
|
|
||
| if (currentSnapshotId == snapshot.snapshotId()) { | ||
| return; | ||
| private void setBranchSnapshot(Snapshot snapshot, String branch, Long currentTimestampMillis) { | ||
| long replacementSnapshotId = snapshot.snapshotId(); | ||
| SnapshotRef ref = refs.get(branch); | ||
| if (ref != null) { | ||
| ValidationException.check(ref.isBranch(), "Cannot update branch: %s is a tag", branch); | ||
| if (ref.snapshotId() == replacementSnapshotId) { | ||
| return; | ||
| } | ||
| } | ||
|
|
||
| ValidationException.check(formatVersion == 1 || snapshot.sequenceNumber() <= lastSequenceNumber, | ||
| "Last sequence number %s is less than existing snapshot sequence number %s", | ||
| lastSequenceNumber, snapshot.sequenceNumber()); | ||
|
|
||
| this.lastUpdatedMillis = currentTimestampMillis != null ? currentTimestampMillis : snapshot.timestampMillis(); | ||
| this.currentSnapshotId = snapshot.snapshotId(); | ||
| snapshotLog.add(new SnapshotLogEntry(lastUpdatedMillis, snapshot.snapshotId())); | ||
| changes.add(new MetadataUpdate.SetCurrentSnapshot(snapshot.snapshotId())); | ||
|
|
||
| if (SnapshotRef.MAIN_BRANCH.equals(branch)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like the check for main branch is done in many places, should we also have a method for
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the problem is that there aren't many places where we actually have a Below, we have a |
||
| this.currentSnapshotId = replacementSnapshotId; | ||
| snapshotLog.add(new SnapshotLogEntry(lastUpdatedMillis, replacementSnapshotId)); | ||
| } | ||
|
|
||
| SnapshotRef newRef; | ||
| if (ref != null) { | ||
| newRef = SnapshotRef.builderFrom(ref, replacementSnapshotId).build(); | ||
| } else { | ||
| newRef = SnapshotRef.branchBuilder(replacementSnapshotId).build(); | ||
| } | ||
|
|
||
| refs.put(branch, newRef); | ||
| changes.add(new MetadataUpdate.SetSnapshotRef(branch, replacementSnapshotId)); | ||
| } | ||
|
|
||
| private static List<MetadataLogEntry> addPreviousFile( | ||
|
|
@@ -1237,9 +1261,11 @@ private static Set<Long> intermediateSnapshotIdSet(List<MetadataUpdate> changes, | |
| // adds must always come before set current snapshot | ||
| MetadataUpdate.AddSnapshot addSnapshot = (MetadataUpdate.AddSnapshot) update; | ||
| addedSnapshotIds.add(addSnapshot.snapshot().snapshotId()); | ||
| } else if (update instanceof MetadataUpdate.SetCurrentSnapshot) { | ||
| Long snapshotId = ((MetadataUpdate.SetCurrentSnapshot) update).snapshotId(); | ||
| if (snapshotId != null && addedSnapshotIds.contains(snapshotId) && snapshotId != currentSnapshotId) { | ||
| } else if (update instanceof MetadataUpdate.SetSnapshotRef) { | ||
| MetadataUpdate.SetSnapshotRef setRef = (MetadataUpdate.SetSnapshotRef) update; | ||
| long snapshotId = setRef.snapshotId(); | ||
| if (addedSnapshotIds.contains(snapshotId) && | ||
| SnapshotRef.MAIN_BRANCH.equals(setRef.name()) && snapshotId != currentSnapshotId) { | ||
| intermediateSnapshotIds.add(snapshotId); | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -737,6 +737,7 @@ public void testHistoryTable() { | |
| TestHelpers.assertEqualsSafe(historyTable.schema().asStruct(), expected.get(0), actual.get(0)); | ||
| TestHelpers.assertEqualsSafe(historyTable.schema().asStruct(), expected.get(1), actual.get(1)); | ||
| TestHelpers.assertEqualsSafe(historyTable.schema().asStruct(), expected.get(2), actual.get(2)); | ||
| TestHelpers.assertEqualsSafe(historyTable.schema().asStruct(), expected.get(3), actual.get(3)); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test was failing, I think due to the problem in #4088. While looking at it, I realized that the last record was not being checked as well.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was wrong about the cause of the history test failure. It turns out that I called the wrong method from |
||
| } | ||
|
|
||
| @Test | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
RemoveSnapshotRefis used for dropping a branch instead ofSetCurrentSnapshot(null). Now the snapshot can be required instead of nullable.