diff --git a/api/src/main/java/com/netflix/iceberg/Files.java b/api/src/main/java/com/netflix/iceberg/Files.java index e85825afb2d7..197dcc13284f 100644 --- a/api/src/main/java/com/netflix/iceberg/Files.java +++ b/api/src/main/java/com/netflix/iceberg/Files.java @@ -99,6 +99,9 @@ public static InputFile localInput(File file) { } public static InputFile localInput(String file) { + if (file.startsWith("file:")) { + return localInput(new File(file.replaceFirst("file:", ""))); + } return localInput(new File(file)); } diff --git a/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java b/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java index 36a873a2b06c..554f24f2e02c 100644 --- a/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java +++ b/core/src/main/java/com/netflix/iceberg/BaseSnapshot.java @@ -138,7 +138,7 @@ private void cacheChanges() { // accumulate adds and deletes from all manifests. // because manifests can be reused in newer snapshots, filter the changes by snapshot id. - for (String manifest : Iterables.transform(manifests, ManifestFile::path)) { + for (String manifest : Iterables.transform(manifests(), ManifestFile::path)) { try (ManifestReader reader = ManifestReader.read(ops.io().newInputFile(manifest))) { for (ManifestEntry add : reader.addedFiles()) { if (add.snapshotId() == snapshotId) { @@ -164,7 +164,7 @@ public String toString() { return Objects.toStringHelper(this) .add("id", snapshotId) .add("timestamp_ms", timestampMillis) - .add("manifests", manifests) + .add("manifests", manifests()) .toString(); } } diff --git a/core/src/main/java/com/netflix/iceberg/TableProperties.java b/core/src/main/java/com/netflix/iceberg/TableProperties.java index 0d99c7ea4982..69bfcf29d2fc 100644 --- a/core/src/main/java/com/netflix/iceberg/TableProperties.java +++ b/core/src/main/java/com/netflix/iceberg/TableProperties.java @@ -73,5 +73,5 @@ public class TableProperties { public static final String WRITE_NEW_DATA_LOCATION = "write.folder-storage.path"; public static final String MANIFEST_LISTS_ENABLED = "write.manifest-lists.enabled"; - public static final boolean MANIFEST_LISTS_ENABLED_DEFAULT = false; + public static final boolean MANIFEST_LISTS_ENABLED_DEFAULT = true; } diff --git a/core/src/test/java/com/netflix/iceberg/TableTestBase.java b/core/src/test/java/com/netflix/iceberg/TableTestBase.java index c723daa14568..010896c8ad5f 100644 --- a/core/src/test/java/com/netflix/iceberg/TableTestBase.java +++ b/core/src/test/java/com/netflix/iceberg/TableTestBase.java @@ -94,13 +94,13 @@ public void cleanupTables() { TestTables.clearTables(); } - List listMetadataFiles(String ext) { - return listMetadataFiles(tableDir, ext); + List listManifestFiles() { + return listManifestFiles(tableDir); } - List listMetadataFiles(File tableDir, String ext) { - return Lists.newArrayList(new File(tableDir, "metadata").listFiles( - (dir, name) -> Files.getFileExtension(name).equalsIgnoreCase(ext))); + List listManifestFiles(File tableDir) { + return Lists.newArrayList(new File(tableDir, "metadata").listFiles((dir, name) -> + !name.startsWith("snap") && Files.getFileExtension(name).equalsIgnoreCase("avro"))); } private TestTables.TestTable create(Schema schema, PartitionSpec spec) { diff --git a/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java b/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java index 257f6e3d142a..ffdec59f5b38 100644 --- a/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java +++ b/core/src/test/java/com/netflix/iceberg/TestCreateTransaction.java @@ -49,7 +49,7 @@ public void testCreateTransaction() throws IOException { Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_create")); Assert.assertEquals("Should have 0 manifest files", - 0, listMetadataFiles(tableDir, "avro").size()); + 0, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); @@ -86,7 +86,7 @@ public void testCreateAndAppendWithTransaction() throws IOException { Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); Assert.assertEquals("Should have 1 manifest file", - 1, listMetadataFiles(tableDir, "avro").size()); + 1, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); @@ -128,7 +128,7 @@ public void testCreateAndAppendWithTable() throws IOException { Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); Assert.assertEquals("Should have 1 manifest file", - 1, listMetadataFiles(tableDir, "avro").size()); + 1, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); @@ -166,7 +166,7 @@ public void testCreateAndUpdatePropertiesWithTransaction() throws IOException { Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_properties")); Assert.assertEquals("Should have 0 manifest files", - 0, listMetadataFiles(tableDir, "avro").size()); + 0, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); @@ -208,7 +208,7 @@ public void testCreateAndUpdatePropertiesWithTable() throws IOException { Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_properties")); Assert.assertEquals("Should have 0 manifest files", - 0, listMetadataFiles(tableDir, "avro").size()); + 0, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); diff --git a/core/src/test/java/com/netflix/iceberg/TestFastAppend.java b/core/src/test/java/com/netflix/iceberg/TestFastAppend.java index 4d9e174df521..02c60ade2e26 100644 --- a/core/src/test/java/com/netflix/iceberg/TestFastAppend.java +++ b/core/src/test/java/com/netflix/iceberg/TestFastAppend.java @@ -32,7 +32,7 @@ public class TestFastAppend extends TableTestBase { @Test public void testEmptyTableAppend() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); TableMetadata base = readMetadata(); Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); diff --git a/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java b/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java index 6b78c631a5db..3446c9750674 100644 --- a/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java +++ b/core/src/test/java/com/netflix/iceberg/TestMergeAppend.java @@ -33,7 +33,7 @@ public class TestMergeAppend extends TableTestBase { @Test public void testEmptyTableAppend() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); TableMetadata base = readMetadata(); Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); @@ -56,7 +56,7 @@ public void testMergeWithExistingManifest() { // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newAppend() .appendFile(FILE_A) @@ -92,7 +92,7 @@ public void testMergeWithExistingManifestAfterDelete() { // merge all manifests for this test table.updateProperties().set("commit.manifest.min-count-to-merge", "1").commit(); - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newAppend() .appendFile(FILE_A) @@ -145,7 +145,7 @@ public void testMinMergeCount() { // only merge when there are at least 4 manifests table.updateProperties().set("commit.manifest.min-count-to-merge", "4").commit(); - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newFastAppend() .appendFile(FILE_A) @@ -193,7 +193,7 @@ public void testMergeSizeTargetWithExistingManifest() { .set(TableProperties.MANIFEST_TARGET_SIZE_BYTES, "10") .commit(); - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newAppend() .appendFile(FILE_A) diff --git a/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java b/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java index 032b6809c3b1..3cc1d50400a6 100644 --- a/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java +++ b/core/src/test/java/com/netflix/iceberg/TestReplaceFiles.java @@ -36,7 +36,7 @@ public class TestReplaceFiles extends TableTestBase { @Test public void testEmptyTable() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); TableMetadata base = readMetadata(); Assert.assertNull("Should not have a current snapshot", base.currentSnapshot()); @@ -51,7 +51,7 @@ public void testEmptyTable() { @Test public void testAddOnly() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, @@ -63,7 +63,7 @@ public void testAddOnly() { @Test public void testDeleteOnly() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); AssertHelpers.assertThrows("Expected an exception", IllegalArgumentException.class, @@ -75,7 +75,7 @@ public void testDeleteOnly() { @Test public void testDeleteWithDuplicateEntriesInManifest() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newAppend() .appendFile(FILE_A) @@ -111,12 +111,12 @@ public void testDeleteWithDuplicateEntriesInManifest() { statuses(DELETED, DELETED, EXISTING)); // We should only get the 3 manifests that this test is expected to add. - Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size()); + Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); } @Test public void testAddAndDelete() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newAppend() .appendFile(FILE_A) @@ -151,7 +151,7 @@ public void testAddAndDelete() { statuses(DELETED, EXISTING)); // We should only get the 3 manifests that this test is expected to add. - Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size()); + Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); } @Test @@ -182,7 +182,7 @@ public void testFailure() { Assert.assertFalse("Should clean up new manifest", new File(manifest2.path()).exists()); // As commit failed all the manifests added with rewrite should be cleaned up - Assert.assertEquals("Only 1 manifest should exist", 1, listMetadataFiles("avro").size()); + Assert.assertEquals("Only 1 manifest should exist", 1, listManifestFiles().size()); } @Test @@ -215,12 +215,12 @@ public void testRecovery() { metadata.currentSnapshot().manifests().contains(manifest2)); // 2 manifests added by rewrite and 1 original manifest should be found. - Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size()); + Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); } @Test public void testDeleteNonExistentFile() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newAppend() .appendFile(FILE_A) @@ -238,12 +238,12 @@ public void testDeleteNonExistentFile() { .rewriteFiles(Sets.newSet(FILE_C), Sets.newSet(FILE_D)) .commit()); - Assert.assertEquals("Only 1 manifests should exist", 1, listMetadataFiles("avro").size()); + Assert.assertEquals("Only 1 manifests should exist", 1, listManifestFiles().size()); } @Test public void testAlreadyDeletedFile() { - Assert.assertEquals("Table should start empty", 0, listMetadataFiles("avro").size()); + Assert.assertEquals("Table should start empty", 0, listManifestFiles().size()); table.newAppend() .appendFile(FILE_A) @@ -282,6 +282,6 @@ public void testAlreadyDeletedFile() { .rewriteFiles(Sets.newSet(FILE_A), Sets.newSet(FILE_D)) .commit()); - Assert.assertEquals("Only 3 manifests should exist", 3, listMetadataFiles("avro").size()); + Assert.assertEquals("Only 3 manifests should exist", 3, listManifestFiles().size()); } } diff --git a/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java b/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java index 4528a4f6070a..2c4e7d92e2ac 100644 --- a/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java +++ b/core/src/test/java/com/netflix/iceberg/TestReplaceTransaction.java @@ -289,7 +289,7 @@ public void testReplaceToCreateAndAppend() throws IOException { Assert.assertEquals("Should have metadata version 0", 0, (int) TestTables.metadataVersion("test_append")); Assert.assertEquals("Should have 1 manifest file", - 1, listMetadataFiles(tableDir, "avro").size()); + 1, listManifestFiles(tableDir).size()); Assert.assertEquals("Table schema should match with reassigned IDs", assignFreshIds(SCHEMA).asStruct(), meta.schema().asStruct()); diff --git a/core/src/test/java/com/netflix/iceberg/TestTables.java b/core/src/test/java/com/netflix/iceberg/TestTables.java index e6aea02fd008..f1dbe4aae3ac 100644 --- a/core/src/test/java/com/netflix/iceberg/TestTables.java +++ b/core/src/test/java/com/netflix/iceberg/TestTables.java @@ -27,12 +27,11 @@ import com.netflix.iceberg.io.InputFile; import com.netflix.iceberg.io.OutputFile; import java.io.File; -import java.io.IOException; import java.util.Map; import static com.netflix.iceberg.TableMetadata.newTableMetadata; -class TestTables { +public class TestTables { static TestTable create(File temp, String name, Schema schema, PartitionSpec spec) { TestTableOperations ops = new TestTableOperations(name, temp); if (ops.current() != null) { @@ -112,7 +111,7 @@ static Integer metadataVersion(String tableName) { } } - static class TestTableOperations implements TableOperations { + public static class TestTableOperations implements TableOperations { private final String tableName; private final File metadata; @@ -120,7 +119,7 @@ static class TestTableOperations implements TableOperations { private long lastSnapshotId = 0; private int failCommits = 0; - TestTableOperations(String tableName, File location) { + public TestTableOperations(String tableName, File location) { this.tableName = tableName; this.metadata = new File(location, "metadata"); metadata.mkdirs(); diff --git a/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java b/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java index 31fd28d8327e..683a0b2af510 100644 --- a/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java +++ b/core/src/test/java/com/netflix/iceberg/hadoop/HadoopTableTestBase.java @@ -29,6 +29,7 @@ import com.netflix.iceberg.Table; import com.netflix.iceberg.TableMetadata; import com.netflix.iceberg.TableMetadataParser; +import com.netflix.iceberg.TestTables; import com.netflix.iceberg.types.Types; import org.apache.hadoop.conf.Configuration; import org.junit.Before; @@ -114,9 +115,9 @@ public void setupTable() throws Exception { this.table = TABLES.create(SCHEMA, SPEC, tableLocation); } - List listMetadataFiles(String ext) { - return Lists.newArrayList(metadataDir.listFiles( - (dir, name) -> Files.getFileExtension(name).equalsIgnoreCase(ext))); + List listManifestFiles() { + return Lists.newArrayList(metadataDir.listFiles((dir, name) -> + !name.startsWith("snap") && Files.getFileExtension(name).equalsIgnoreCase("avro"))); } File version(int i) { @@ -124,7 +125,8 @@ File version(int i) { } TableMetadata readMetadataVersion(int version) { - return TableMetadataParser.read(null, localInput(version(version))); + return TableMetadataParser.read(new TestTables.TestTableOperations("table", tableDir), + localInput(version(version))); } int readVersionHint() throws IOException { diff --git a/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java b/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java index 657abf3219f2..0ee021026d71 100644 --- a/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java +++ b/core/src/test/java/com/netflix/iceberg/hadoop/TestHadoopCommits.java @@ -62,7 +62,7 @@ public void testCreateTable() throws Exception { Assert.assertEquals("Should write the current version to the hint file", 1, readVersionHint()); - List manifests = listMetadataFiles("avro"); + List manifests = listManifestFiles(); Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size()); } @@ -88,7 +88,7 @@ public void testSchemaUpdate() throws Exception { List tasks = Lists.newArrayList(table.newScan().planFiles()); Assert.assertEquals("Should not create any scan tasks", 0, tasks.size()); - List manifests = listMetadataFiles("avro"); + List manifests = listManifestFiles(); Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size()); } @@ -108,7 +108,7 @@ public void testFailedCommit() throws Exception { AssertHelpers.assertThrows("Should fail to commit change based on v1 when v2 exists", CommitFailedException.class, "Version 2 already exists", update::commit); - List manifests = listMetadataFiles("avro"); + List manifests = listManifestFiles(); Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size()); } @@ -144,7 +144,7 @@ public void testStaleMetadata() throws Exception { AssertHelpers.assertThrows("Should fail with stale base metadata", CommitFailedException.class, "based on stale table metadata", updateCopy::commit); - List manifests = listMetadataFiles("avro"); + List manifests = listManifestFiles(); Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size()); } @@ -196,7 +196,7 @@ public void testFastAppend() throws Exception { List tasks = Lists.newArrayList(table.newScan().planFiles()); Assert.assertEquals("Should scan 1 file", 1, tasks.size()); - List manifests = listMetadataFiles("avro"); + List manifests = listManifestFiles(); Assert.assertEquals("Should contain only one Avro manifest file", 1, manifests.size()); // second append @@ -213,7 +213,7 @@ public void testFastAppend() throws Exception { Assert.assertEquals("Should scan 2 files", 2, tasks.size()); Assert.assertEquals("Should contain 2 Avro manifest files", - 2, listMetadataFiles("avro").size()); + 2, listManifestFiles().size()); TableMetadata metadata = readMetadataVersion(3); Assert.assertEquals("Current snapshot should contain 2 manifests", @@ -236,7 +236,7 @@ public void testMergeAppend() throws Exception { Assert.assertEquals("Should scan 3 files", 3, tasks.size()); Assert.assertEquals("Should contain 3 Avro manifest files", - 3, listMetadataFiles("avro").size()); + 3, listManifestFiles().size()); TableMetadata metadata = readMetadataVersion(5); Assert.assertEquals("Current snapshot should contain 1 merged manifest",