From 370a365920e74a1c022d0250e88e74e2931dfd40 Mon Sep 17 00:00:00 2001 From: huzheng Date: Thu, 4 Mar 2021 14:24:31 +0800 Subject: [PATCH 1/7] Core: Support rewriting delete files. --- .../java/org/apache/iceberg/RewriteFiles.java | 88 ++++++++++++++++- .../org/apache/iceberg/BaseRewriteFiles.java | 21 ++-- .../org/apache/iceberg/TestRewriteFiles.java | 98 ++++++++++++++++++- 3 files changed, 197 insertions(+), 10 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/RewriteFiles.java b/api/src/main/java/org/apache/iceberg/RewriteFiles.java index 49df7fc9ff1d..2a5c8f0215cb 100644 --- a/api/src/main/java/org/apache/iceberg/RewriteFiles.java +++ b/api/src/main/java/org/apache/iceberg/RewriteFiles.java @@ -19,8 +19,14 @@ package org.apache.iceberg; +import java.util.Collections; +import java.util.List; import java.util.Set; import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; /** * API for replacing files in a table. @@ -38,8 +44,86 @@ public interface RewriteFiles extends SnapshotUpdate { * Add a rewrite that replaces one set of files with another set that contains the same data. * * @param filesToDelete files that will be replaced (deleted), cannot be null or empty. - * @param filesToAdd files that will be added, cannot be null or empty. + * @param filesToAdd files that will be added, cannot be null or empty. * @return this for method chaining */ - RewriteFiles rewriteFiles(Set filesToDelete, Set filesToAdd); + default RewriteFiles rewriteFiles(Set filesToDelete, Set filesToAdd) { + return rewriteFiles( + FileSet.ofDataFiles(filesToDelete), + FileSet.ofDataFiles(filesToAdd) + ); + } + + /** + * Add a rewrite that replaces one set of files with another set that contains the same data. + * + * @param filesToDelete files that will be replaced (deleted), cannot be null empty. + * @param filesToAdd files that will be added, cannot be null or empty. + * @return this for method chaining. + */ + RewriteFiles rewriteFiles(FileSet filesToDelete, FileSet filesToAdd); + + class FileSet { + private final Set dataFiles = Sets.newHashSet(); + private final Set deleteFiles = Sets.newHashSet(); + + public static FileSet ofDataFiles(DataFile... dataFiles) { + return new FileSet(dataFiles, new DeleteFile[0]); + } + + public static FileSet ofDataFiles(Iterable dataFiles) { + return new FileSet(dataFiles, ImmutableSet.of()); + } + + public static FileSet ofDeleteFiles(Iterable deleteFiles) { + return new FileSet(ImmutableSet.of(), deleteFiles); + } + + public static FileSet ofDeleteFiles(DeleteFile... deleteFiles) { + return new FileSet(new DataFile[0], deleteFiles); + } + + public static FileSet of(Iterable dataFiles, Iterable deleteFiles) { + return new FileSet(dataFiles, deleteFiles); + } + + public static FileSet of(ContentFile... files) { + List dataFiles = Lists.newArrayList(); + List deleteFiles = Lists.newArrayList(); + + for (ContentFile file : files) { + if (file instanceof DataFile) { + dataFiles.add((DataFile) file); + } else if (file instanceof DeleteFile) { + deleteFiles.add((DeleteFile) file); + } else { + throw new IllegalArgumentException("Unknown content file: " + file); + } + } + + return new FileSet(dataFiles, deleteFiles); + } + + private FileSet(DataFile[] dataFiles, DeleteFile[] deleteFiles) { + Collections.addAll(this.dataFiles, dataFiles); + Collections.addAll(this.deleteFiles, deleteFiles); + } + + private FileSet(Iterable dataFiles, Iterable deleteFiles) { + Iterables.addAll(this.dataFiles, dataFiles); + Iterables.addAll(this.deleteFiles, deleteFiles); + } + + public Set dataFiles() { + return dataFiles; + } + + public Set deleteFiles() { + return deleteFiles; + } + + public boolean isEmpty() { + return dataFiles.isEmpty() && deleteFiles.isEmpty(); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java index d2daac5f9b39..0baba0051e05 100644 --- a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java +++ b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java @@ -19,7 +19,6 @@ package org.apache.iceberg; -import java.util.Set; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; class BaseRewriteFiles extends MergingSnapshotProducer implements RewriteFiles { @@ -41,18 +40,26 @@ protected String operation() { } @Override - public RewriteFiles rewriteFiles(Set filesToDelete, Set filesToAdd) { - Preconditions.checkArgument(filesToDelete != null && !filesToDelete.isEmpty(), + public RewriteFiles rewriteFiles(FileSet filesToRemove, FileSet filesToAdd) { + Preconditions.checkArgument(filesToRemove != null && !filesToRemove.isEmpty(), "Files to delete cannot be null or empty"); Preconditions.checkArgument(filesToAdd != null && !filesToAdd.isEmpty(), "Files to add can not be null or empty"); - for (DataFile toDelete : filesToDelete) { - delete(toDelete); + for (DataFile dataFileToRemove : filesToRemove.dataFiles()) { + delete(dataFileToRemove); } - for (DataFile toAdd : filesToAdd) { - add(toAdd); + for (DeleteFile deleteFileToRemove : filesToRemove.deleteFiles()) { + delete(deleteFileToRemove); + } + + for (DataFile dataFileToAdd : filesToAdd.dataFiles()) { + add(dataFileToAdd); + } + + for (DeleteFile deleteFileToAdd : filesToAdd.deleteFiles()) { + add(deleteFileToAdd); } return this; diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index fa3240f76a5e..eabce9a090f4 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -21,9 +21,11 @@ import java.io.File; import java.util.Collections; +import java.util.List; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.ValidationException; import org.junit.Assert; +import org.junit.Assume; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -37,7 +39,7 @@ public class TestRewriteFiles extends TableTestBase { @Parameterized.Parameters(name = "formatVersion = {0}") public static Object[] parameters() { - return new Object[] { 1, 2 }; + return new Object[] {1, 2}; } public TestRewriteFiles(int formatVersion) { @@ -57,6 +59,13 @@ public void testEmptyTable() { () -> table.newRewrite() .rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_B)) .commit()); + + AssertHelpers.assertThrows("Expected an exception", + ValidationException.class, + "Missing required files to delete: /path/to/data-a-deletes.parquet", + () -> table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(FILE_A_DELETES), RewriteFiles.FileSet.of(FILE_A, FILE_B_DELETES)) + .commit()); } @Test @@ -69,6 +78,20 @@ public void testAddOnly() { () -> table.newRewrite() .rewriteFiles(Sets.newSet(FILE_A), Collections.emptySet()) .apply()); + + AssertHelpers.assertThrows("Expected an exception", + IllegalArgumentException.class, + "Files to add can not be null or empty", + () -> table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(FILE_A_DELETES), RewriteFiles.FileSet.of()) + .apply()); + + AssertHelpers.assertThrows("Expected an exception", + IllegalArgumentException.class, + "Files to add can not be null or empty", + () -> table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES), RewriteFiles.FileSet.of()) + .apply()); } @Test @@ -81,6 +104,20 @@ public void testDeleteOnly() { () -> table.newRewrite() .rewriteFiles(Collections.emptySet(), Sets.newSet(FILE_A)) .apply()); + + AssertHelpers.assertThrows("Expected an exception", + IllegalArgumentException.class, + "Files to delete cannot be null or empty", + () -> table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(), RewriteFiles.FileSet.of(FILE_A_DELETES)) + .apply()); + + AssertHelpers.assertThrows("Expected an exception", + IllegalArgumentException.class, + "Files to delete cannot be null or empty", + () -> table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(), RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES)) + .apply()); } @Test @@ -164,6 +201,65 @@ public void testAddAndDelete() { Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); } + @Test + public void testRewriteDataAndDeleteFiles() { + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion == 2); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); + + table.newRowDelta() + .addRows(FILE_A) + .addRows(FILE_B) + .addRows(FILE_C) + .addDeletes(FILE_A_DELETES) + .addDeletes(FILE_B_DELETES) + .commit(); + + TableMetadata base = readMetadata(); + Snapshot baseSnap = base.currentSnapshot(); + long baseSnapshotId = baseSnap.snapshotId(); + Assert.assertEquals("Should create 2 manifests for initial write", 2, baseSnap.allManifests().size()); + List initialManifests = baseSnap.allManifests(); + + validateManifestEntries(initialManifests.get(0), + ids(baseSnapshotId, baseSnapshotId, baseSnapshotId), + files(FILE_A, FILE_B, FILE_C), + statuses(ADDED, ADDED, ADDED)); + validateDeleteManifest(initialManifests.get(1), + seqs(1, 1), + ids(baseSnapshotId, baseSnapshotId), + files(FILE_A_DELETES, FILE_B_DELETES), + statuses(ADDED, ADDED)); + + // Rewrite the files. + Snapshot pending = table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES), RewriteFiles.FileSet.of(FILE_D)) + .apply(); + + Assert.assertEquals("Should contain 3 manifest", 3, pending.allManifests().size()); + Assert.assertFalse("Should not contain manifest from initial write", + pending.allManifests().containsAll(initialManifests)); + + long pendingId = pending.snapshotId(); + validateManifestEntries(pending.allManifests().get(0), + ids(pendingId), + files(FILE_D), + statuses(ADDED)); + + validateManifestEntries(pending.allManifests().get(1), + ids(pendingId, baseSnapshotId, baseSnapshotId), + files(FILE_A, FILE_B, FILE_C), + statuses(DELETED, EXISTING, EXISTING)); + + validateDeleteManifest(pending.allManifests().get(2), + seqs(2, 1), + ids(pendingId, baseSnapshotId), + files(FILE_A_DELETES, FILE_B_DELETES), + statuses(DELETED, EXISTING)); + + // We should only get the 3 manifests that this test is expected to add. + Assert.assertEquals("Only 5 manifests should exist", 5, listManifestFiles().size()); + } + @Test public void testFailure() { table.newAppend() From 06492ad42402da8477a3935073e899939b3a8477 Mon Sep 17 00:00:00 2001 From: huzheng Date: Thu, 4 Mar 2021 16:36:00 +0800 Subject: [PATCH 2/7] Add unit tests. --- .../org/apache/iceberg/TestRewriteFiles.java | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index eabce9a090f4..20601fe26bb3 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.junit.Assert; import org.junit.Assume; import org.junit.Test; @@ -291,6 +292,57 @@ public void testFailure() { Assert.assertEquals("Only 1 manifest should exist", 1, listManifestFiles().size()); } + @Test + public void testFailureWhenRewriteBothDataAndDeleteFiles() { + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion == 2); + + table.newRowDelta() + .addRows(FILE_A) + .addRows(FILE_B) + .addRows(FILE_C) + .addDeletes(FILE_A_DELETES) + .addDeletes(FILE_B_DELETES) + .commit(); + + long baseSnapshotId = readMetadata().currentSnapshot().snapshotId(); + table.ops().failCommits(5); + + RewriteFiles rewrite = table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES, FILE_B_DELETES), RewriteFiles.FileSet.of(FILE_D)); + Snapshot pending = rewrite.apply(); + + Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests().size()); + ManifestFile manifest1 = pending.allManifests().get(0); + ManifestFile manifest2 = pending.allManifests().get(1); + ManifestFile manifest3 = pending.allManifests().get(2); + + validateManifestEntries(pending.allManifests().get(0), + ids(pending.snapshotId()), + files(FILE_D), + statuses(ADDED)); + + validateManifestEntries(pending.allManifests().get(1), + ids(pending.snapshotId(), baseSnapshotId, baseSnapshotId), + files(FILE_A, FILE_B, FILE_C), + statuses(DELETED, EXISTING, EXISTING)); + + validateDeleteManifest(pending.allManifests().get(2), + seqs(2, 2), + ids(pending.snapshotId(), pending.snapshotId()), + files(FILE_A_DELETES, FILE_B_DELETES), + statuses(DELETED, DELETED)); + + AssertHelpers.assertThrows("Should retry 4 times and throw last failure", + CommitFailedException.class, "Injected failure", rewrite::commit); + + Assert.assertFalse("Should clean up new manifest", new File(manifest1.path()).exists()); + Assert.assertFalse("Should clean up new manifest", new File(manifest2.path()).exists()); + Assert.assertFalse("Should clean up new manifest", new File(manifest3.path()).exists()); + + // As commit failed all the manifests added with rewrite should be cleaned up + Assert.assertEquals("Only 2 manifest should exist", 2, listManifestFiles().size()); + } + @Test public void testRecovery() { table.newAppend() @@ -324,6 +376,61 @@ public void testRecovery() { Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); } + @Test + public void testRecoverWhenRewriteBothDataAndDeleteFiles() { + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion == 2); + + table.newRowDelta() + .addRows(FILE_A) + .addRows(FILE_B) + .addRows(FILE_C) + .addDeletes(FILE_A_DELETES) + .addDeletes(FILE_B_DELETES) + .commit(); + + long baseSnapshotId = readMetadata().currentSnapshot().snapshotId(); + table.ops().failCommits(3); + + RewriteFiles rewrite = table.newRewrite() + .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES, FILE_B_DELETES), RewriteFiles.FileSet.of(FILE_D)); + Snapshot pending = rewrite.apply(); + + Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests().size()); + ManifestFile manifest1 = pending.allManifests().get(0); + ManifestFile manifest2 = pending.allManifests().get(1); + ManifestFile manifest3 = pending.allManifests().get(2); + + validateManifestEntries(pending.allManifests().get(0), + ids(pending.snapshotId()), + files(FILE_D), + statuses(ADDED)); + + validateManifestEntries(pending.allManifests().get(1), + ids(pending.snapshotId(), baseSnapshotId, baseSnapshotId), + files(FILE_A, FILE_B, FILE_C), + statuses(DELETED, EXISTING, EXISTING)); + + validateDeleteManifest(pending.allManifests().get(2), + seqs(2, 2), + ids(pending.snapshotId(), pending.snapshotId()), + files(FILE_A_DELETES, FILE_B_DELETES), + statuses(DELETED, DELETED)); + + rewrite.commit(); + + Assert.assertTrue("Should reuse new manifest", new File(manifest1.path()).exists()); + Assert.assertTrue("Should reuse new manifest", new File(manifest2.path()).exists()); + Assert.assertTrue("Should reuse new manifest", new File(manifest3.path()).exists()); + + TableMetadata metadata = readMetadata(); + List committedManifests = Lists.newArrayList(manifest1, manifest2, manifest3); + Assert.assertTrue("Should committed the manifests", + metadata.currentSnapshot().allManifests().containsAll(committedManifests)); + + // As commit success all the manifests added with rewrite should be available. + Assert.assertEquals("Only 5 manifest should exist", 5, listManifestFiles().size()); + } + @Test public void testDeleteNonExistentFile() { Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); From 0f4edf3515b2e8b1acd18ea27bc5779167fd9df5 Mon Sep 17 00:00:00 2001 From: huzheng Date: Mon, 8 Mar 2021 15:27:42 +0800 Subject: [PATCH 3/7] Refactor the RewriteFiles API --- .../java/org/apache/iceberg/RewriteFiles.java | 84 +++---------------- .../org/apache/iceberg/BaseRewriteFiles.java | 64 ++++++++++---- .../org/apache/iceberg/TestRewriteFiles.java | 23 +++-- 3 files changed, 75 insertions(+), 96 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/RewriteFiles.java b/api/src/main/java/org/apache/iceberg/RewriteFiles.java index 2a5c8f0215cb..b82539d7f25a 100644 --- a/api/src/main/java/org/apache/iceberg/RewriteFiles.java +++ b/api/src/main/java/org/apache/iceberg/RewriteFiles.java @@ -19,14 +19,9 @@ package org.apache.iceberg; -import java.util.Collections; -import java.util.List; import java.util.Set; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; -import org.apache.iceberg.relocated.com.google.common.collect.Iterables; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.relocated.com.google.common.collect.Sets; /** * API for replacing files in a table. @@ -49,81 +44,22 @@ public interface RewriteFiles extends SnapshotUpdate { */ default RewriteFiles rewriteFiles(Set filesToDelete, Set filesToAdd) { return rewriteFiles( - FileSet.ofDataFiles(filesToDelete), - FileSet.ofDataFiles(filesToAdd) + filesToDelete, + ImmutableSet.of(), + filesToAdd, + ImmutableSet.of() ); } /** * Add a rewrite that replaces one set of files with another set that contains the same data. * - * @param filesToDelete files that will be replaced (deleted), cannot be null empty. - * @param filesToAdd files that will be added, cannot be null or empty. + * @param dataFilesToDelete data files that will be replaced (deleted). + * @param deleteFilesToDelete delete files that will be replaced (deleted). + * @param dataFilesToAdd data files that will be added. + * @param deleteFilesToAdd delete files that will be added. * @return this for method chaining. */ - RewriteFiles rewriteFiles(FileSet filesToDelete, FileSet filesToAdd); - - class FileSet { - private final Set dataFiles = Sets.newHashSet(); - private final Set deleteFiles = Sets.newHashSet(); - - public static FileSet ofDataFiles(DataFile... dataFiles) { - return new FileSet(dataFiles, new DeleteFile[0]); - } - - public static FileSet ofDataFiles(Iterable dataFiles) { - return new FileSet(dataFiles, ImmutableSet.of()); - } - - public static FileSet ofDeleteFiles(Iterable deleteFiles) { - return new FileSet(ImmutableSet.of(), deleteFiles); - } - - public static FileSet ofDeleteFiles(DeleteFile... deleteFiles) { - return new FileSet(new DataFile[0], deleteFiles); - } - - public static FileSet of(Iterable dataFiles, Iterable deleteFiles) { - return new FileSet(dataFiles, deleteFiles); - } - - public static FileSet of(ContentFile... files) { - List dataFiles = Lists.newArrayList(); - List deleteFiles = Lists.newArrayList(); - - for (ContentFile file : files) { - if (file instanceof DataFile) { - dataFiles.add((DataFile) file); - } else if (file instanceof DeleteFile) { - deleteFiles.add((DeleteFile) file); - } else { - throw new IllegalArgumentException("Unknown content file: " + file); - } - } - - return new FileSet(dataFiles, deleteFiles); - } - - private FileSet(DataFile[] dataFiles, DeleteFile[] deleteFiles) { - Collections.addAll(this.dataFiles, dataFiles); - Collections.addAll(this.deleteFiles, deleteFiles); - } - - private FileSet(Iterable dataFiles, Iterable deleteFiles) { - Iterables.addAll(this.dataFiles, dataFiles); - Iterables.addAll(this.deleteFiles, deleteFiles); - } - - public Set dataFiles() { - return dataFiles; - } - - public Set deleteFiles() { - return deleteFiles; - } - - public boolean isEmpty() { - return dataFiles.isEmpty() && deleteFiles.isEmpty(); - } - } + RewriteFiles rewriteFiles(Set dataFilesToDelete, Set deleteFilesToDelete, + Set dataFilesToAdd, Set deleteFilesToAdd); } diff --git a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java index 0baba0051e05..24b9784613ea 100644 --- a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java +++ b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java @@ -19,6 +19,7 @@ package org.apache.iceberg; +import java.util.Set; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; class BaseRewriteFiles extends MergingSnapshotProducer implements RewriteFiles { @@ -39,27 +40,62 @@ protected String operation() { return DataOperations.REPLACE; } + private void checkFilesToDelete(Set dataFilesToDelete, + Set deleteFilesToDelete) { + int filesToDelete = 0; + if (dataFilesToDelete != null) { + filesToDelete += dataFilesToDelete.size(); + } + + if (deleteFilesToDelete != null) { + filesToDelete += deleteFilesToDelete.size(); + } + + Preconditions.checkArgument(filesToDelete > 0, "Files to delete cannot be null or empty"); + } + + private void checkFilesToAdd(Set dataFilesToAdd, + Set deleteFilesToAdd) { + int filesToAdd = 0; + if (dataFilesToAdd != null) { + filesToAdd += dataFilesToAdd.size(); + } + + if (deleteFilesToAdd != null) { + filesToAdd += deleteFilesToAdd.size(); + } + + Preconditions.checkArgument(filesToAdd > 0, "Files to add can not be null or empty"); + } + @Override - public RewriteFiles rewriteFiles(FileSet filesToRemove, FileSet filesToAdd) { - Preconditions.checkArgument(filesToRemove != null && !filesToRemove.isEmpty(), - "Files to delete cannot be null or empty"); - Preconditions.checkArgument(filesToAdd != null && !filesToAdd.isEmpty(), - "Files to add can not be null or empty"); - - for (DataFile dataFileToRemove : filesToRemove.dataFiles()) { - delete(dataFileToRemove); + public RewriteFiles rewriteFiles(Set dataFilesToDelete, Set deleteFilesToDelete, + Set dataFilesToAdd, Set deleteFilesToAdd) { + checkFilesToDelete(dataFilesToDelete, deleteFilesToDelete); + checkFilesToAdd(dataFilesToAdd, deleteFilesToAdd); + + if (dataFilesToDelete != null) { + for (DataFile dataFile : dataFilesToDelete) { + delete(dataFile); + } } - for (DeleteFile deleteFileToRemove : filesToRemove.deleteFiles()) { - delete(deleteFileToRemove); + if (deleteFilesToDelete != null) { + for (DeleteFile deleteFile : deleteFilesToDelete) { + delete(deleteFile); + } } - for (DataFile dataFileToAdd : filesToAdd.dataFiles()) { - add(dataFileToAdd); + if (dataFilesToAdd != null) { + for (DataFile dataFile : dataFilesToAdd) { + add(dataFile); + } } - for (DeleteFile deleteFileToAdd : filesToAdd.deleteFiles()) { - add(deleteFileToAdd); + if (deleteFilesToAdd != null) { + for (DeleteFile deleteFile : deleteFilesToAdd) { + add(deleteFile); + } } return this; diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index 20601fe26bb3..aa96af6e1499 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.iceberg.exceptions.CommitFailedException; import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.junit.Assert; import org.junit.Assume; @@ -65,7 +66,8 @@ public void testEmptyTable() { ValidationException.class, "Missing required files to delete: /path/to/data-a-deletes.parquet", () -> table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(FILE_A_DELETES), RewriteFiles.FileSet.of(FILE_A, FILE_B_DELETES)) + .rewriteFiles(ImmutableSet.of(), ImmutableSet.of(FILE_A_DELETES), + ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_B_DELETES)) .commit()); } @@ -84,14 +86,15 @@ public void testAddOnly() { IllegalArgumentException.class, "Files to add can not be null or empty", () -> table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(FILE_A_DELETES), RewriteFiles.FileSet.of()) + .rewriteFiles(ImmutableSet.of(), ImmutableSet.of(FILE_A_DELETES), ImmutableSet.of(), ImmutableSet.of()) .apply()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, "Files to add can not be null or empty", () -> table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES), RewriteFiles.FileSet.of()) + .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_A_DELETES), + ImmutableSet.of(), ImmutableSet.of()) .apply()); } @@ -110,14 +113,15 @@ public void testDeleteOnly() { IllegalArgumentException.class, "Files to delete cannot be null or empty", () -> table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(), RewriteFiles.FileSet.of(FILE_A_DELETES)) + .rewriteFiles(ImmutableSet.of(), ImmutableSet.of(), ImmutableSet.of(), ImmutableSet.of(FILE_A_DELETES)) .apply()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, "Files to delete cannot be null or empty", () -> table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(), RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES)) + .rewriteFiles(ImmutableSet.of(), ImmutableSet.of(), + ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_A_DELETES)) .apply()); } @@ -233,7 +237,8 @@ public void testRewriteDataAndDeleteFiles() { // Rewrite the files. Snapshot pending = table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES), RewriteFiles.FileSet.of(FILE_D)) + .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_A_DELETES), + ImmutableSet.of(FILE_D), ImmutableSet.of()) .apply(); Assert.assertEquals("Should contain 3 manifest", 3, pending.allManifests().size()); @@ -308,7 +313,8 @@ public void testFailureWhenRewriteBothDataAndDeleteFiles() { table.ops().failCommits(5); RewriteFiles rewrite = table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES, FILE_B_DELETES), RewriteFiles.FileSet.of(FILE_D)); + .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_A_DELETES, FILE_B_DELETES), + ImmutableSet.of(FILE_D), ImmutableSet.of()); Snapshot pending = rewrite.apply(); Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests().size()); @@ -392,7 +398,8 @@ public void testRecoverWhenRewriteBothDataAndDeleteFiles() { table.ops().failCommits(3); RewriteFiles rewrite = table.newRewrite() - .rewriteFiles(RewriteFiles.FileSet.of(FILE_A, FILE_A_DELETES, FILE_B_DELETES), RewriteFiles.FileSet.of(FILE_D)); + .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_A_DELETES, FILE_B_DELETES), + ImmutableSet.of(FILE_D), ImmutableSet.of()); Snapshot pending = rewrite.apply(); Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests().size()); From 3475382a9d7d4563a20a30f726957a94d0af69cb Mon Sep 17 00:00:00 2001 From: huzheng Date: Tue, 9 Mar 2021 15:22:59 +0800 Subject: [PATCH 4/7] Minior fixes. --- .../java/org/apache/iceberg/RewriteFiles.java | 4 +- .../org/apache/iceberg/BaseRewriteFiles.java | 25 ++-- .../org/apache/iceberg/TableTestBase.java | 8 ++ .../org/apache/iceberg/TestRewriteFiles.java | 126 ++++++++++++++++-- 4 files changed, 133 insertions(+), 30 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/RewriteFiles.java b/api/src/main/java/org/apache/iceberg/RewriteFiles.java index b82539d7f25a..3c109de1fb1d 100644 --- a/api/src/main/java/org/apache/iceberg/RewriteFiles.java +++ b/api/src/main/java/org/apache/iceberg/RewriteFiles.java @@ -36,7 +36,7 @@ */ public interface RewriteFiles extends SnapshotUpdate { /** - * Add a rewrite that replaces one set of files with another set that contains the same data. + * Add a rewrite that replaces one set of data files with another set that contains the same data. * * @param filesToDelete files that will be replaced (deleted), cannot be null or empty. * @param filesToAdd files that will be added, cannot be null or empty. @@ -52,7 +52,7 @@ default RewriteFiles rewriteFiles(Set filesToDelete, Set fil } /** - * Add a rewrite that replaces one set of files with another set that contains the same data. + * Add a rewrite that replaces one set of files with another set that contains the same data (format v2). * * @param dataFilesToDelete data files that will be replaced (deleted). * @param deleteFilesToDelete delete files that will be replaced (deleted). diff --git a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java index 24b9784613ea..08ba662a6df6 100644 --- a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java +++ b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java @@ -40,8 +40,8 @@ protected String operation() { return DataOperations.REPLACE; } - private void checkFilesToDelete(Set dataFilesToDelete, - Set deleteFilesToDelete) { + private void verifyInputAndOutputFiles(Set dataFilesToDelete, Set deleteFilesToDelete, + Set dataFilesToAdd, Set deleteFilesToAdd) { int filesToDelete = 0; if (dataFilesToDelete != null) { filesToDelete += dataFilesToDelete.size(); @@ -52,27 +52,20 @@ private void checkFilesToDelete(Set dataFilesToDelete, } Preconditions.checkArgument(filesToDelete > 0, "Files to delete cannot be null or empty"); - } - - private void checkFilesToAdd(Set dataFilesToAdd, - Set deleteFilesToAdd) { - int filesToAdd = 0; - if (dataFilesToAdd != null) { - filesToAdd += dataFilesToAdd.size(); - } - if (deleteFilesToAdd != null) { - filesToAdd += deleteFilesToAdd.size(); + if (deleteFilesToDelete == null || deleteFilesToDelete.isEmpty()) { + // When there is no delete files in the rewrite action, data files to add cannot be null or empty. + Preconditions.checkArgument(dataFilesToAdd != null && dataFilesToAdd.size() > 0, + "Data files to add can not be null or empty because there's no delete file to rewrite"); + Preconditions.checkArgument(deleteFilesToAdd == null || deleteFilesToAdd.isEmpty(), + "Delete files to add must be null or empty because there's no delete file to rewrite"); } - - Preconditions.checkArgument(filesToAdd > 0, "Files to add can not be null or empty"); } @Override public RewriteFiles rewriteFiles(Set dataFilesToDelete, Set deleteFilesToDelete, Set dataFilesToAdd, Set deleteFilesToAdd) { - checkFilesToDelete(dataFilesToDelete, deleteFilesToDelete); - checkFilesToAdd(dataFilesToAdd, deleteFilesToAdd); + verifyInputAndOutputFiles(dataFilesToDelete, deleteFilesToDelete, dataFilesToAdd, deleteFilesToAdd); if (dataFilesToDelete != null) { for (DataFile dataFile : dataFilesToDelete) { diff --git a/core/src/test/java/org/apache/iceberg/TableTestBase.java b/core/src/test/java/org/apache/iceberg/TableTestBase.java index 7907ce7ea0cb..4bb6f66c6a6a 100644 --- a/core/src/test/java/org/apache/iceberg/TableTestBase.java +++ b/core/src/test/java/org/apache/iceberg/TableTestBase.java @@ -73,6 +73,14 @@ public class TableTestBase { .withPartitionPath("data_bucket=0") // easy way to set partition data for now .withRecordCount(1) .build(); + // Equality delete files. + static final DeleteFile FILE_A2_DELETES = FileMetadata.deleteFileBuilder(SPEC) + .ofEqualityDeletes(3) + .withPath("/path/to/data-a2-deletes.parquet") + .withFileSizeInBytes(10) + .withPartitionPath("data_bucket=0") + .withRecordCount(1) + .build(); static final DataFile FILE_B = DataFiles.builder(SPEC) .withPath("/path/to/data-b.parquet") .withFileSizeInBytes(10) diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index aa96af6e1499..a3014415ce16 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -77,24 +77,25 @@ public void testAddOnly() { AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Files to add can not be null or empty", + "Data files to add can not be null or empty because there's no delete file to rewrite", () -> table.newRewrite() .rewriteFiles(Sets.newSet(FILE_A), Collections.emptySet()) .apply()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Files to add can not be null or empty", + "Data files to add can not be null or empty because there's no delete file to rewrite", () -> table.newRewrite() - .rewriteFiles(ImmutableSet.of(), ImmutableSet.of(FILE_A_DELETES), ImmutableSet.of(), ImmutableSet.of()) + .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(), + ImmutableSet.of(), ImmutableSet.of(FILE_A_DELETES)) .apply()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Files to add can not be null or empty", + "Delete files to add must be null or empty because there's no delete file to rewrite", () -> table.newRewrite() - .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_A_DELETES), - ImmutableSet.of(), ImmutableSet.of()) + .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(), + ImmutableSet.of(FILE_B), ImmutableSet.of(FILE_B_DELETES)) .apply()); } @@ -208,7 +209,7 @@ public void testAddAndDelete() { @Test public void testRewriteDataAndDeleteFiles() { - Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion == 2); + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newRowDelta() @@ -299,7 +300,7 @@ public void testFailure() { @Test public void testFailureWhenRewriteBothDataAndDeleteFiles() { - Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion == 2); + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); table.newRowDelta() .addRows(FILE_A) @@ -384,7 +385,7 @@ public void testRecovery() { @Test public void testRecoverWhenRewriteBothDataAndDeleteFiles() { - Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion == 2); + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); table.newRowDelta() .addRows(FILE_A) @@ -407,17 +408,17 @@ public void testRecoverWhenRewriteBothDataAndDeleteFiles() { ManifestFile manifest2 = pending.allManifests().get(1); ManifestFile manifest3 = pending.allManifests().get(2); - validateManifestEntries(pending.allManifests().get(0), + validateManifestEntries(manifest1, ids(pending.snapshotId()), files(FILE_D), statuses(ADDED)); - validateManifestEntries(pending.allManifests().get(1), + validateManifestEntries(manifest2, ids(pending.snapshotId(), baseSnapshotId, baseSnapshotId), files(FILE_A, FILE_B, FILE_C), statuses(DELETED, EXISTING, EXISTING)); - validateDeleteManifest(pending.allManifests().get(2), + validateDeleteManifest(manifest3, seqs(2, 2), ids(pending.snapshotId(), pending.snapshotId()), files(FILE_A_DELETES, FILE_B_DELETES), @@ -438,6 +439,107 @@ public void testRecoverWhenRewriteBothDataAndDeleteFiles() { Assert.assertEquals("Only 5 manifest should exist", 5, listManifestFiles().size()); } + @Test + public void testReplaceEqualityDeletesWithPositionDeletes() { + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); + + table.newRowDelta() + .addRows(FILE_A2) + .addDeletes(FILE_A2_DELETES) + .commit(); + + TableMetadata metadata = readMetadata(); + long baseSnapshotId = metadata.currentSnapshot().snapshotId(); + + // Apply and commit the rewrite transaction. + RewriteFiles rewrite = table.newRewrite().rewriteFiles( + ImmutableSet.of(), ImmutableSet.of(FILE_A2_DELETES), + ImmutableSet.of(), ImmutableSet.of(FILE_B_DELETES) + ); + Snapshot pending = rewrite.apply(); + + Assert.assertEquals("Should produce 3 manifests", 3, pending.allManifests().size()); + ManifestFile manifest1 = pending.allManifests().get(0); + ManifestFile manifest2 = pending.allManifests().get(1); + ManifestFile manifest3 = pending.allManifests().get(2); + + validateManifestEntries(manifest1, + ids(baseSnapshotId), + files(FILE_A2), + statuses(ADDED)); + + validateDeleteManifest(manifest2, + seqs(2), + ids(pending.snapshotId()), + files(FILE_B_DELETES), + statuses(ADDED)); + + validateDeleteManifest(manifest3, + seqs(2), + ids(pending.snapshotId()), + files(FILE_A2_DELETES), + statuses(DELETED)); + + rewrite.commit(); + + Assert.assertTrue("Should reuse new manifest", new File(manifest1.path()).exists()); + Assert.assertTrue("Should reuse new manifest", new File(manifest2.path()).exists()); + Assert.assertTrue("Should reuse new manifest", new File(manifest3.path()).exists()); + + metadata = readMetadata(); + List committedManifests = Lists.newArrayList(manifest1, manifest2, manifest3); + Assert.assertTrue("Should committed the manifests", + metadata.currentSnapshot().allManifests().containsAll(committedManifests)); + + // As commit success all the manifests added with rewrite should be available. + Assert.assertEquals("4 manifests should exist", 4, listManifestFiles().size()); + } + + @Test + public void testRemoveAllDeletes() { + Assume.assumeTrue("Rewriting delete files is only supported in iceberg format v2. ", formatVersion > 1); + + table.newRowDelta() + .addRows(FILE_A) + .addDeletes(FILE_A_DELETES) + .commit(); + + // Apply and commit the rewrite transaction. + RewriteFiles rewrite = table.newRewrite().rewriteFiles( + ImmutableSet.of(FILE_A), ImmutableSet.of(FILE_A_DELETES), + ImmutableSet.of(), ImmutableSet.of() + ); + Snapshot pending = rewrite.apply(); + + Assert.assertEquals("Should produce 2 manifests", 2, pending.allManifests().size()); + ManifestFile manifest1 = pending.allManifests().get(0); + ManifestFile manifest2 = pending.allManifests().get(1); + + validateManifestEntries(manifest1, + ids(pending.snapshotId()), + files(FILE_A), + statuses(DELETED)); + + validateDeleteManifest(manifest2, + seqs(2), + ids(pending.snapshotId()), + files(FILE_A_DELETES), + statuses(DELETED)); + + rewrite.commit(); + + Assert.assertTrue("Should reuse the new manifest", new File(manifest1.path()).exists()); + Assert.assertTrue("Should reuse the new manifest", new File(manifest2.path()).exists()); + + TableMetadata metadata = readMetadata(); + List committedManifests = Lists.newArrayList(manifest1, manifest2); + Assert.assertTrue("Should committed the manifests", + metadata.currentSnapshot().allManifests().containsAll(committedManifests)); + + // As commit success all the manifests added with rewrite should be available. + Assert.assertEquals("4 manifests should exist", 4, listManifestFiles().size()); + } + @Test public void testDeleteNonExistentFile() { Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); From 6f7ede47efcdec0793a4a7938004bc9aacedcfb1 Mon Sep 17 00:00:00 2001 From: huzheng Date: Thu, 18 Mar 2021 15:54:39 +0800 Subject: [PATCH 5/7] Addressing comments --- api/src/main/java/org/apache/iceberg/RewriteFiles.java | 2 +- .../main/java/org/apache/iceberg/BaseRewriteFiles.java | 4 ++-- .../test/java/org/apache/iceberg/TestRewriteFiles.java | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/RewriteFiles.java b/api/src/main/java/org/apache/iceberg/RewriteFiles.java index 3c109de1fb1d..f09008a15d8f 100644 --- a/api/src/main/java/org/apache/iceberg/RewriteFiles.java +++ b/api/src/main/java/org/apache/iceberg/RewriteFiles.java @@ -52,7 +52,7 @@ default RewriteFiles rewriteFiles(Set filesToDelete, Set fil } /** - * Add a rewrite that replaces one set of files with another set that contains the same data (format v2). + * Add a rewrite that replaces one set of files with another set that contains the same data. * * @param dataFilesToDelete data files that will be replaced (deleted). * @param deleteFilesToDelete delete files that will be replaced (deleted). diff --git a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java index 08ba662a6df6..1de7c8be61a3 100644 --- a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java +++ b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java @@ -56,9 +56,9 @@ private void verifyInputAndOutputFiles(Set dataFilesToDelete, Set 0, - "Data files to add can not be null or empty because there's no delete file to rewrite"); + "Data files to add can not be null or empty because there's no delete file to be rewritten"); Preconditions.checkArgument(deleteFilesToAdd == null || deleteFilesToAdd.isEmpty(), - "Delete files to add must be null or empty because there's no delete file to rewrite"); + "Delete files to add must be null or empty because there's no delete file to be rewritten"); } } diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index a3014415ce16..4cda57b99a25 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -244,7 +244,7 @@ public void testRewriteDataAndDeleteFiles() { Assert.assertEquals("Should contain 3 manifest", 3, pending.allManifests().size()); Assert.assertFalse("Should not contain manifest from initial write", - pending.allManifests().containsAll(initialManifests)); + pending.allManifests().stream().anyMatch(initialManifests::contains)); long pendingId = pending.snapshotId(); validateManifestEntries(pending.allManifests().get(0), @@ -432,8 +432,8 @@ public void testRecoverWhenRewriteBothDataAndDeleteFiles() { TableMetadata metadata = readMetadata(); List committedManifests = Lists.newArrayList(manifest1, manifest2, manifest3); - Assert.assertTrue("Should committed the manifests", - metadata.currentSnapshot().allManifests().containsAll(committedManifests)); + Assert.assertEquals("Should committed the manifests", + metadata.currentSnapshot().allManifests(), committedManifests); // As commit success all the manifests added with rewrite should be available. Assert.assertEquals("Only 5 manifest should exist", 5, listManifestFiles().size()); @@ -488,8 +488,8 @@ public void testReplaceEqualityDeletesWithPositionDeletes() { metadata = readMetadata(); List committedManifests = Lists.newArrayList(manifest1, manifest2, manifest3); - Assert.assertTrue("Should committed the manifests", - metadata.currentSnapshot().allManifests().containsAll(committedManifests)); + Assert.assertEquals("Should committed the manifests", + metadata.currentSnapshot().allManifests(), committedManifests); // As commit success all the manifests added with rewrite should be available. Assert.assertEquals("4 manifests should exist", 4, listManifestFiles().size()); From fe98e01a36d016b37165680ae3de24c15f393d19 Mon Sep 17 00:00:00 2001 From: huzheng Date: Wed, 24 Mar 2021 16:41:27 +0800 Subject: [PATCH 6/7] Fix broken unit tests --- core/src/test/java/org/apache/iceberg/TestRewriteFiles.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index 4cda57b99a25..a75dd4841d9f 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -77,14 +77,14 @@ public void testAddOnly() { AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Data files to add can not be null or empty because there's no delete file to rewrite", + "Data files to add can not be null or empty because there's no delete file to be rewritten", () -> table.newRewrite() .rewriteFiles(Sets.newSet(FILE_A), Collections.emptySet()) .apply()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Data files to add can not be null or empty because there's no delete file to rewrite", + "Data files to add can not be null or empty because there's no delete file to be rewritten", () -> table.newRewrite() .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(), ImmutableSet.of(), ImmutableSet.of(FILE_A_DELETES)) @@ -92,7 +92,7 @@ public void testAddOnly() { AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Delete files to add must be null or empty because there's no delete file to rewrite", + "Delete files to add must be null or empty because there's no delete file to be rewritten", () -> table.newRewrite() .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(), ImmutableSet.of(FILE_B), ImmutableSet.of(FILE_B_DELETES)) From e84022ee63fc45cc4cf6779d64686a53738ae9cb Mon Sep 17 00:00:00 2001 From: huzheng Date: Fri, 26 Mar 2021 11:27:39 +0800 Subject: [PATCH 7/7] Address the nullable comments. --- .../org/apache/iceberg/BaseRewriteFiles.java | 48 ++++++++----------- .../org/apache/iceberg/TestRewriteFiles.java | 6 +-- 2 files changed, 23 insertions(+), 31 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java index 1de7c8be61a3..c7732ef8a5a9 100644 --- a/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java +++ b/core/src/main/java/org/apache/iceberg/BaseRewriteFiles.java @@ -42,23 +42,23 @@ protected String operation() { private void verifyInputAndOutputFiles(Set dataFilesToDelete, Set deleteFilesToDelete, Set dataFilesToAdd, Set deleteFilesToAdd) { - int filesToDelete = 0; - if (dataFilesToDelete != null) { - filesToDelete += dataFilesToDelete.size(); - } + Preconditions.checkNotNull(dataFilesToDelete, "Data files to delete can not be null"); + Preconditions.checkNotNull(deleteFilesToDelete, "Delete files to delete can not be null"); + Preconditions.checkNotNull(dataFilesToAdd, "Data files to add can not be null"); + Preconditions.checkNotNull(deleteFilesToAdd, "Delete files to add can not be null"); - if (deleteFilesToDelete != null) { - filesToDelete += deleteFilesToDelete.size(); - } + int filesToDelete = 0; + filesToDelete += dataFilesToDelete.size(); + filesToDelete += deleteFilesToDelete.size(); Preconditions.checkArgument(filesToDelete > 0, "Files to delete cannot be null or empty"); - if (deleteFilesToDelete == null || deleteFilesToDelete.isEmpty()) { + if (deleteFilesToDelete.isEmpty()) { // When there is no delete files in the rewrite action, data files to add cannot be null or empty. - Preconditions.checkArgument(dataFilesToAdd != null && dataFilesToAdd.size() > 0, - "Data files to add can not be null or empty because there's no delete file to be rewritten"); - Preconditions.checkArgument(deleteFilesToAdd == null || deleteFilesToAdd.isEmpty(), - "Delete files to add must be null or empty because there's no delete file to be rewritten"); + Preconditions.checkArgument(dataFilesToAdd.size() > 0, + "Data files to add can not be empty because there's no delete file to be rewritten"); + Preconditions.checkArgument(deleteFilesToAdd.isEmpty(), + "Delete files to add must be empty because there's no delete file to be rewritten"); } } @@ -67,28 +67,20 @@ public RewriteFiles rewriteFiles(Set dataFilesToDelete, Set dataFilesToAdd, Set deleteFilesToAdd) { verifyInputAndOutputFiles(dataFilesToDelete, deleteFilesToDelete, dataFilesToAdd, deleteFilesToAdd); - if (dataFilesToDelete != null) { - for (DataFile dataFile : dataFilesToDelete) { - delete(dataFile); - } + for (DataFile dataFile : dataFilesToDelete) { + delete(dataFile); } - if (deleteFilesToDelete != null) { - for (DeleteFile deleteFile : deleteFilesToDelete) { - delete(deleteFile); - } + for (DeleteFile deleteFile : deleteFilesToDelete) { + delete(deleteFile); } - if (dataFilesToAdd != null) { - for (DataFile dataFile : dataFilesToAdd) { - add(dataFile); - } + for (DataFile dataFile : dataFilesToAdd) { + add(dataFile); } - if (deleteFilesToAdd != null) { - for (DeleteFile deleteFile : deleteFilesToAdd) { - add(deleteFile); - } + for (DeleteFile deleteFile : deleteFilesToAdd) { + add(deleteFile); } return this; diff --git a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java index a75dd4841d9f..2bdc9f2c4940 100644 --- a/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestRewriteFiles.java @@ -77,14 +77,14 @@ public void testAddOnly() { AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Data files to add can not be null or empty because there's no delete file to be rewritten", + "Data files to add can not be empty because there's no delete file to be rewritten", () -> table.newRewrite() .rewriteFiles(Sets.newSet(FILE_A), Collections.emptySet()) .apply()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Data files to add can not be null or empty because there's no delete file to be rewritten", + "Data files to add can not be empty because there's no delete file to be rewritten", () -> table.newRewrite() .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(), ImmutableSet.of(), ImmutableSet.of(FILE_A_DELETES)) @@ -92,7 +92,7 @@ public void testAddOnly() { AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, - "Delete files to add must be null or empty because there's no delete file to be rewritten", + "Delete files to add must be empty because there's no delete file to be rewritten", () -> table.newRewrite() .rewriteFiles(ImmutableSet.of(FILE_A), ImmutableSet.of(), ImmutableSet.of(FILE_B), ImmutableSet.of(FILE_B_DELETES))