Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 10 additions & 14 deletions core/src/main/java/org/apache/iceberg/BaseTransaction.java
Original file line number Diff line number Diff line change
Expand Up @@ -446,20 +446,16 @@ private void commitSimpleTransaction() {
}

Set<String> committedFiles = committedFiles(ops, newSnapshots);
if (committedFiles != null) {
// delete all of the files that were deleted in the most recent set of operation commits
Tasks.foreach(deletedFiles)
.suppressFailureWhenFinished()
.onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted file: {}", file, exc))
.run(
path -> {
if (!committedFiles.contains(path)) {
ops.io().deleteFile(path);
}
});
} else {
LOG.warn("Failed to load metadata for a committed snapshot, skipping clean-up");
}
// delete all of the files that were deleted in the most recent set of operation commits
Tasks.foreach(deletedFiles)
.suppressFailureWhenFinished()
.onFailure((file, exc) -> LOG.warn("Failed to delete uncommitted file: {}", file, exc))
.run(
path -> {
if (committedFiles == null || !committedFiles.contains(path)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, good catch. If there are no committed files (which would be expected for a transaction with just ExpireSnapshots) but there are files to cleanup (which would be expected for ExpireSnapshots again) then we should proceed with the file removal.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just realized alternatively we could've just returned an empty set in committedFiles instead of null and then could've removed the (committedFiles == null) check (in addition to the current top level check). I'm not super opinionated on that though (it can be done in a follow on)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should actually change committedFiles to return ImmutableSet.of() if there are no new snapshot IDs. The logic is correct to warn if the other reason null is returned happens (a committed snapshot is missing). null signals that the output of the method is invalid, which we assumed was the case if there are no committed snapshots. But here we have a case where it's a valid case to have no committed snapshots and therefore no committed files.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a PR #9221 for addressing this.

ops.io().deleteFile(path);
}
});

} catch (RuntimeException e) {
LOG.warn("Failed to load committed metadata, skipping clean-up", e);
Expand Down
9 changes: 9 additions & 0 deletions core/src/test/java/org/apache/iceberg/TableTestBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,15 @@ List<File> listManifestFiles(File tableDirToList) {
&& Files.getFileExtension(name).equalsIgnoreCase("avro")));
}

List<File> listManifestLists(String tableDirToList) {
return Lists.newArrayList(
new File(tableDirToList, "metadata")
.listFiles(
(dir, name) ->
name.startsWith("snap")
&& Files.getFileExtension(name).equalsIgnoreCase("avro")));
}

public static long countAllMetadataFiles(File tableDir) {
return Arrays.stream(new File(tableDir, "metadata").listFiles())
.filter(f -> f.isFile())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,10 @@ public void testRetainNAvailableSnapshotsWithTransaction() {
t3 = System.currentTimeMillis();
}

// Retain last 2 snapshots
Assert.assertEquals(
"Should be 3 manifest lists", 3, listManifestLists(table.location()).size());

// Retain last 2 snapshots, which means 1 is deleted.
Transaction tx = table.newTransaction();
removeSnapshots(tx.table()).expireOlderThan(t3).retainLast(2).commit();
tx.commitTransaction();
Expand All @@ -449,6 +452,8 @@ public void testRetainNAvailableSnapshotsWithTransaction() {
"Should have two snapshots.", 2, Lists.newArrayList(table.snapshots()).size());
Assert.assertEquals(
"First snapshot should not present.", null, table.snapshot(firstSnapshotId));
Assert.assertEquals(
"Should be 2 manifest lists", 2, listManifestLists(table.location()).size());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@ public void testExpirationInTransaction() {
V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap1.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 1", 1, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 1 manifest list", 1, listManifestLists(table.location()).size());

table.newAppend().appendFile(FILE_B).commit();
Snapshot snap2 = table.currentSnapshot();
Expand All @@ -319,12 +321,18 @@ public void testExpirationInTransaction() {
V2Assert.assertEquals("Snapshot sequence number should be 2", 2, snap2.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 2", 2, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 2 manifest lists", 2, listManifestLists(table.location()).size());

Transaction txn = table.newTransaction();
txn.expireSnapshots().expireSnapshotId(commitId1).commit();
txn.commitTransaction();
V2Assert.assertEquals(
"Last sequence number should be 2", 2, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 1 manifest list as 1 was deleted",
1,
listManifestLists(table.location()).size());
}

@Test
Expand Down