diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 4ba3bdf8d737..e2153f83188e 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -56,6 +56,14 @@ public class TableMetadata implements Serializable { static final int INITIAL_SPEC_ID = 0; static final int INITIAL_SORT_ORDER_ID = 1; static final int INITIAL_SCHEMA_ID = 0; + static final int MIN_FORMAT_VERSION_BRANCHING = 2; + static final int MIN_FORMAT_VERSION_UUID_REQUIRED = 2; + static final int MIN_FORMAT_VERSION_SEQUENCE_NUMBER = 2; + static final int MIN_FORMAT_VERSION_PARTITION_SPECS_REQUIRED = 2; + static final int MIN_FORMAT_VERSION_LAST_PARTITION_ID_REQUIRED = 2; + static final int MIN_FORMAT_VERSION_SORT_ORDER_REQUIRED = 2; + static final int MIN_FORMAT_VERSION_SCHEMAS_REQUIRED = 2; + static final int MIN_FORMAT_VERSION_CURRENT_SCHEMA_ID_REQUIRED = 2; private static final long ONE_MINUTE = TimeUnit.MINUTES.toMillis(1); diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java index d7f2b29be75a..4d6cf2286f5c 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadataParser.java @@ -110,6 +110,8 @@ private TableMetadataParser() {} static final String METADATA_LOG = "metadata-log"; static final String STATISTICS = "statistics"; static final String PARTITION_STATISTICS = "partition-statistics"; + static final String ROW_LINEAGE = "row-lineage"; + static final String NEXT_ROW_ID = "next-row-id"; public static void overwrite(TableMetadata metadata, OutputFile outputFile) { internalWrite(metadata, outputFile, true); diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 45aa211e5187..073f8f24fa6a 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -32,6 +32,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.assertj.core.api.Assertions.entry; +import static org.assertj.core.api.Assumptions.assumeThat; import static org.junit.jupiter.params.provider.Arguments.arguments; import com.fasterxml.jackson.core.JsonGenerator; @@ -68,6 +69,7 @@ import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.FieldSource; import org.junit.jupiter.params.provider.MethodSource; public class TestTableMetadata { @@ -96,10 +98,13 @@ public class TestTableMetadata { public TableOperations ops = new LocalTableOperations(temp); - @Test + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") @SuppressWarnings("MethodLength") - public void testJsonConversion() throws Exception { + public void testJsonConversion(int formatVersion) throws Exception { long previousSnapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); + long lastSequenceNumber = + formatVersion >= TableMetadata.MIN_FORMAT_VERSION_SEQUENCE_NUMBER ? SEQ_NO : 0; String manifestList = createManifestListWithManifestFile(previousSnapshotId, null, "file:/tmp/manifest1.avro"); @@ -162,10 +167,10 @@ public void testJsonConversion() throws Exception { TableMetadata expected = new TableMetadata( null, - 2, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, - SEQ_NO, + lastSequenceNumber, System.currentTimeMillis(), 3, 7, @@ -314,8 +319,11 @@ public void testBackwardCompat() throws Exception { assertThat(metadata.snapshot(previousSnapshotId).schemaId()).isNull(); } - @Test - public void testInvalidMainBranch() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testInvalidMainBranch(int formatVersion) throws IOException { + assumeThat(formatVersion).isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_BRANCHING); + long previousSnapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); String manifestList = @@ -359,7 +367,7 @@ public void testInvalidMainBranch() throws IOException { () -> new TableMetadata( null, - 2, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, SEQ_NO, @@ -386,8 +394,11 @@ public void testInvalidMainBranch() throws IOException { .hasMessageStartingWith("Current snapshot ID does not match main branch"); } - @Test - public void testMainWithoutCurrent() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testMainWithoutCurrent(int formatVersion) throws IOException { + assumeThat(formatVersion).isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_BRANCHING); + long snapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); String manifestList = @@ -404,7 +415,7 @@ public void testMainWithoutCurrent() throws IOException { () -> new TableMetadata( null, - 2, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, SEQ_NO, @@ -431,8 +442,11 @@ public void testMainWithoutCurrent() throws IOException { .hasMessageStartingWith("Current snapshot is not set, but main branch exists"); } - @Test - public void testBranchSnapshotMissing() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testBranchSnapshotMissing(int formatVersion) { + assumeThat(formatVersion).isGreaterThanOrEqualTo(2); + long snapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); Schema schema = new Schema(6, Types.NestedField.required(10, "x", Types.StringType.get())); @@ -444,7 +458,7 @@ public void testBranchSnapshotMissing() { () -> new TableMetadata( null, - 2, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, SEQ_NO, @@ -513,8 +527,9 @@ private static String toJsonWithoutSpecAndSchemaList(TableMetadata metadata) { return writer.toString(); } - @Test - public void testJsonWithPreviousMetadataLog() throws Exception { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testJsonWithPreviousMetadataLog(int formatVersion) throws Exception { long previousSnapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); String manifestList = @@ -548,7 +563,7 @@ public void testJsonWithPreviousMetadataLog() throws Exception { TableMetadata base = new TableMetadata( null, - 1, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, 0, @@ -578,8 +593,9 @@ public void testJsonWithPreviousMetadataLog() throws Exception { assertThat(metadataFromJson.previousFiles()).isEqualTo(previousMetadataLog); } - @Test - public void testAddPreviousMetadataRemoveNone() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testAddPreviousMetadataRemoveNone(int formatVersion) throws IOException { long previousSnapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); String manifestList = @@ -624,7 +640,7 @@ public void testAddPreviousMetadataRemoveNone() throws IOException { TableMetadata base = new TableMetadata( latestPreviousMetadata.file(), - 1, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, 0, @@ -660,8 +676,9 @@ public void testAddPreviousMetadataRemoveNone() throws IOException { assertThat(removedPreviousMetadata).isEmpty(); } - @Test - public void testAddPreviousMetadataRemoveOne() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testAddPreviousMetadataRemoveOne(int formatVersion) throws IOException { long previousSnapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); String manifestList = @@ -715,7 +732,7 @@ public void testAddPreviousMetadataRemoveOne() throws IOException { TableMetadata base = new TableMetadata( latestPreviousMetadata.file(), - 1, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, 0, @@ -755,8 +772,9 @@ public void testAddPreviousMetadataRemoveOne() throws IOException { .isEqualTo(previousMetadataLog.subList(0, 1)); } - @Test - public void testAddPreviousMetadataRemoveMultiple() throws IOException { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testAddPreviousMetadataRemoveMultiple(int formatVersion) throws IOException { long previousSnapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600); String manifestList = @@ -810,7 +828,7 @@ public void testAddPreviousMetadataRemoveMultiple() throws IOException { TableMetadata base = new TableMetadata( latestPreviousMetadata.file(), - 1, + formatVersion, UUID.randomUUID().toString(), TEST_LOCATION, 0, @@ -850,13 +868,17 @@ public void testAddPreviousMetadataRemoveMultiple() throws IOException { .isEqualTo(previousMetadataLog.subList(0, 4)); } - @Test - public void testV2UUIDValidation() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testUUIDValidation(int formatVersion) { + assumeThat(formatVersion) + .isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_UUID_REQUIRED); + assertThatThrownBy( () -> new TableMetadata( null, - 2, + formatVersion, null, TEST_LOCATION, SEQ_NO, @@ -880,7 +902,7 @@ public void testV2UUIDValidation() { ImmutableList.of(), ImmutableList.of())) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("UUID is required in format v2"); + .hasMessage(String.format("UUID is required in format v%s", formatVersion)); } @Test @@ -973,62 +995,91 @@ public void testVersionValidation() { .isNotNull(); } - @Test - public void testParserVersionValidation() throws Exception { - String supportedVersion1 = readTableMetadataInputFile("TableMetadataV1Valid.json"); - TableMetadata parsed1 = TableMetadataParser.fromJson(supportedVersion1); - assertThat(parsed1).as("Should successfully read supported metadata version").isNotNull(); - - String supportedVersion2 = readTableMetadataInputFile("TableMetadataV2Valid.json"); - TableMetadata parsed2 = TableMetadataParser.fromJson(supportedVersion2); - assertThat(parsed2).as("Should successfully read supported metadata version").isNotNull(); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParserVersionValidation(int formatVersion) throws Exception { + String supportedVersion = + readTableMetadataInputFile(String.format("TableMetadataV%sValid.json", formatVersion)); + TableMetadata parsed = TableMetadataParser.fromJson(supportedVersion); + assertThat(parsed).as("Should successfully read supported metadata version").isNotNull(); + } + @Test + public void testParserUnsupportedVersion() throws Exception { String unsupportedVersion = readTableMetadataInputFile("TableMetadataUnsupportedVersion.json"); assertThatThrownBy(() -> TableMetadataParser.fromJson(unsupportedVersion)) .isInstanceOf(IllegalArgumentException.class) .hasMessageStartingWith("Cannot read unsupported version"); } - @Test - public void testParserV2PartitionSpecsValidation() throws Exception { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParserPartitionSpecsValidation(int formatVersion) throws Exception { + assumeThat(formatVersion) + .isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_PARTITION_SPECS_REQUIRED); + String unsupportedVersion = - readTableMetadataInputFile("TableMetadataV2MissingPartitionSpecs.json"); + readTableMetadataInputFile( + String.format("TableMetadataV%sMissingPartitionSpecs.json", formatVersion)); assertThatThrownBy(() -> TableMetadataParser.fromJson(unsupportedVersion)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("partition-specs must exist in format v2"); + .hasMessage(String.format("partition-specs must exist in format v%s", formatVersion)); } - @Test - public void testParserV2LastAssignedFieldIdValidation() throws Exception { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParserLastAssignedFieldIdValidation(int formatVersion) throws Exception { + assumeThat(formatVersion) + .isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_LAST_PARTITION_ID_REQUIRED); + String unsupportedVersion = - readTableMetadataInputFile("TableMetadataV2MissingLastPartitionId.json"); + readTableMetadataInputFile( + String.format("TableMetadataV%sMissingLastPartitionId.json", formatVersion)); assertThatThrownBy(() -> TableMetadataParser.fromJson(unsupportedVersion)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("last-partition-id must exist in format v2"); + .hasMessage(String.format("last-partition-id must exist in format v%s", formatVersion)); } - @Test - public void testParserV2SortOrderValidation() throws Exception { - String unsupportedVersion = readTableMetadataInputFile("TableMetadataV2MissingSortOrder.json"); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParserSortOrderValidation(int formatVersion) throws Exception { + assumeThat(formatVersion) + .isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_SORT_ORDER_REQUIRED); + + String unsupportedVersion = + readTableMetadataInputFile( + String.format("TableMetadataV%sMissingSortOrder.json", formatVersion)); assertThatThrownBy(() -> TableMetadataParser.fromJson(unsupportedVersion)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("sort-orders must exist in format v2"); + .hasMessage(String.format("sort-orders must exist in format v%s", formatVersion)); } - @Test - public void testParserV2CurrentSchemaIdValidation() throws Exception { - String unsupported = readTableMetadataInputFile("TableMetadataV2CurrentSchemaNotFound.json"); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParserCurrentSchemaIdValidation(int formatVersion) throws Exception { + assumeThat(formatVersion) + .isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_CURRENT_SCHEMA_ID_REQUIRED); + + String unsupported = + readTableMetadataInputFile( + String.format("TableMetadataV%sCurrentSchemaNotFound.json", formatVersion)); assertThatThrownBy(() -> TableMetadataParser.fromJson(unsupported)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot find schema with current-schema-id=2 from schemas"); } - @Test - public void testParserV2SchemasValidation() throws Exception { - String unsupported = readTableMetadataInputFile("TableMetadataV2MissingSchemas.json"); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParserSchemasValidation(int formatVersion) throws Exception { + assumeThat(formatVersion) + .isGreaterThanOrEqualTo(TableMetadata.MIN_FORMAT_VERSION_SCHEMAS_REQUIRED); + + String unsupported = + readTableMetadataInputFile( + String.format("TableMetadataV%sMissingSchemas.json", formatVersion)); assertThatThrownBy(() -> TableMetadataParser.fromJson(unsupported)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("schemas must exist in format v2"); + .hasMessage(String.format("schemas must exist in format v%s", formatVersion)); } private String readTableMetadataInputFile(String fileName) throws Exception { @@ -1036,8 +1087,9 @@ private String readTableMetadataInputFile(String fileName) throws Exception { return String.join("", java.nio.file.Files.readAllLines(path)); } - @Test - public void testNewTableMetadataReassignmentAllIds() throws Exception { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testNewTableMetadataReassignmentAllIds(int formatVersion) throws Exception { Schema schema = new Schema( Types.NestedField.required(3, "x", Types.LongType.get()), @@ -1052,7 +1104,8 @@ public void testNewTableMetadataReassignmentAllIds() throws Exception { .build(); String location = "file://tmp/db/table"; TableMetadata metadata = - TableMetadata.newTableMetadata(schema, spec, location, ImmutableMap.of()); + TableMetadata.newTableMetadata( + schema, spec, SortOrder.unsorted(), location, ImmutableMap.of(), formatVersion); // newTableMetadata should reassign column ids and partition field ids. PartitionSpec expected = @@ -1126,8 +1179,11 @@ public void testBuildReplacementForV1Table() { .isEqualTo(expected); } - @Test - public void testBuildReplacementForV2Table() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testBuildReplacementForV2AndV3Table(int formatVersion) { + assumeThat(formatVersion).isGreaterThanOrEqualTo(2); + Schema schema = new Schema( Types.NestedField.required(1, "x", Types.LongType.get()), @@ -1137,7 +1193,7 @@ public void testBuildReplacementForV2Table() { String location = "file://tmp/db/table"; TableMetadata metadata = TableMetadata.newTableMetadata( - schema, spec, SortOrder.unsorted(), location, ImmutableMap.of(), 2); + schema, spec, SortOrder.unsorted(), location, ImmutableMap.of(), formatVersion); assertThat(metadata.spec()).isEqualTo(spec); Schema updatedSchema = @@ -1161,28 +1217,39 @@ public void testBuildReplacementForV2Table() { .isEqualTo(expected); } - @Test - public void testSortOrder() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testSortOrder(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + null, + ImmutableMap.of("format-version", String.valueOf(formatVersion))); + assertThat(meta.formatVersion()).isEqualTo(formatVersion); assertThat(meta.sortOrder().isUnsorted()).isTrue(); assertThat(meta.replaceSortOrder(SortOrder.unsorted())) .as("Should detect identical unsorted order") .isSameAs(meta); } - @Test - public void testUpdateSortOrder() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testUpdateSortOrder(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); SortOrder order = SortOrder.builderFor(schema).asc("x").build(); TableMetadata sortedByX = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), order, null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + order, + null, + ImmutableMap.of("format-version", String.valueOf(formatVersion))); + assertThat(sortedByX.formatVersion()).isEqualTo(formatVersion); assertThat(sortedByX.sortOrders()).hasSize(1); assertThat(sortedByX.sortOrder().orderId()).isEqualTo(1); assertThat(sortedByX.sortOrder().fields()).hasSize(1); @@ -1213,23 +1280,35 @@ public void testUpdateSortOrder() { assertThat(sortedByX.sortOrder().fields().get(0).nullOrder()).isEqualTo(NullOrder.NULLS_FIRST); } - @Test - public void testStatistics() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testStatistics(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion); assertThat(meta.statisticsFiles()).as("Should default to no statistics files").isEmpty(); } - @Test - public void testSetStatistics() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testSetStatistics(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion); TableMetadata withStatistics = TableMetadata.buildFrom(meta) @@ -1262,14 +1341,20 @@ public void testSetStatistics() { assertThat(statisticsFile.path()).isEqualTo("/some/path/to/stats/file2"); } - @Test - public void testRemoveStatistics() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testRemoveStatistics(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.buildFrom( TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of())) + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion)) .setStatistics( 43, new GenericStatisticsFile( @@ -1294,25 +1379,37 @@ public void testRemoveStatistics() { assertThat(statisticsFile.path()).isEqualTo("/some/path/to/stats/file2"); } - @Test - public void testPartitionStatistics() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testPartitionStatistics(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion); assertThat(meta.partitionStatisticsFiles()) .as("Should default to no partition statistics files") .isEmpty(); } - @Test - public void testSetPartitionStatistics() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testSetPartitionStatistics(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion); TableMetadata withPartitionStatistics = TableMetadata.buildFrom(meta) @@ -1355,14 +1452,20 @@ public void testSetPartitionStatistics() { assertThat(partitionStatisticsFile.fileSizeInBytes()).isEqualTo(48L); } - @Test - public void testRemovePartitionStatistics() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testRemovePartitionStatistics(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.buildFrom( TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of())) + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion)) .setPartitionStatistics( ImmutableGenericPartitionStatisticsFile.builder() .snapshotId(43) @@ -1395,17 +1498,26 @@ public void testRemovePartitionStatistics() { assertThat(partitionStatisticsFile.fileSizeInBytes()).isEqualTo(49L); } - @Test - public void testParseSchemaIdentifierFields() throws Exception { - String data = readTableMetadataInputFile("TableMetadataV2Valid.json"); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParseSchemaIdentifierFields(int formatVersion) throws Exception { + assumeThat(formatVersion).isGreaterThanOrEqualTo(2); + + String data = + readTableMetadataInputFile(String.format("TableMetadataV%sValid.json", formatVersion)); TableMetadata parsed = TableMetadataParser.fromJson(data); assertThat(parsed.schemasById().get(0).identifierFieldIds()).isEmpty(); assertThat(parsed.schemasById().get(1).identifierFieldIds()).containsExactly(1, 2); } - @Test - public void testParseMinimal() throws Exception { - String data = readTableMetadataInputFile("TableMetadataV2ValidMinimal.json"); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParseMinimal(int formatVersion) throws Exception { + assumeThat(formatVersion).isGreaterThanOrEqualTo(2); + + String data = + readTableMetadataInputFile( + String.format("TableMetadataV%sValidMinimal.json", formatVersion)); TableMetadata parsed = TableMetadataParser.fromJson(data); assertThat(parsed.snapshots()).isEmpty(); assertThat(parsed.snapshotLog()).isEmpty(); @@ -1413,13 +1525,19 @@ public void testParseMinimal() throws Exception { assertThat(parsed.previousFiles()).isEmpty(); } - @Test - public void testUpdateSchemaIdentifierFields() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testUpdateSchemaIdentifierFields(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion); Schema newSchema = new Schema( @@ -1430,13 +1548,19 @@ public void testUpdateSchemaIdentifierFields() { assertThat(newMeta.schema().identifierFieldIds()).containsExactly(1); } - @Test - public void testUpdateSchema() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testUpdateSchema(int formatVersion) { Schema schema = new Schema(0, Types.NestedField.required(1, "y", Types.LongType.get(), "comment")); TableMetadata freshTable = TableMetadata.newTableMetadata( - schema, PartitionSpec.unpartitioned(), null, ImmutableMap.of()); + schema, + PartitionSpec.unpartitioned(), + SortOrder.unsorted(), + null, + ImmutableMap.of(), + formatVersion); assertThat(freshTable.currentSchemaId()).isEqualTo(TableMetadata.INITIAL_SCHEMA_ID); assertSameSchemaList(ImmutableList.of(schema), freshTable.schemas()); assertThat(freshTable.schema().asStruct()).isEqualTo(schema.asStruct()); @@ -1496,8 +1620,9 @@ schema, new Schema(1, schema2.columns()), new Schema(2, schema3.columns())), assertThat(threeSchemaTable.lastColumnId()).isEqualTo(6); } - @Test - public void testCreateV2MetadataThroughTableProperty() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testCreateMetadataThroughTableProperty(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); TableMetadata meta = @@ -1505,9 +1630,10 @@ public void testCreateV2MetadataThroughTableProperty() { schema, PartitionSpec.unpartitioned(), null, - ImmutableMap.of(TableProperties.FORMAT_VERSION, "2", "key", "val")); + ImmutableMap.of( + TableProperties.FORMAT_VERSION, String.valueOf(formatVersion), "key", "val")); - assertThat(meta.formatVersion()).isEqualTo(2); + assertThat(meta.formatVersion()).isEqualTo(formatVersion); assertThat(meta.properties()) .containsEntry("key", "val") .doesNotContainKey(TableProperties.FORMAT_VERSION); @@ -1579,9 +1705,14 @@ public void testUpgradeMetadataThroughTableProperty(int baseFormatVersion, int n .containsExactly(entry("key2", "val2")); } - @Test - public void testParseStatisticsFiles() throws Exception { - String data = readTableMetadataInputFile("TableMetadataStatisticsFiles.json"); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParseStatisticsFiles(int formatVersion) throws Exception { + assumeThat(formatVersion).isGreaterThanOrEqualTo(2); + + String data = + readTableMetadataInputFile( + String.format("TableMetadataV%sStatisticsFiles.json", formatVersion)); TableMetadata parsed = TableMetadataParser.fromJson(data); assertThat(parsed.statisticsFiles()).hasSize(1); assertThat(parsed.statisticsFiles()) @@ -1598,9 +1729,14 @@ public void testParseStatisticsFiles() throws Exception { "ndv", 3055729675574597004L, 1, ImmutableList.of(1), ImmutableMap.of())))); } - @Test - public void testParsePartitionStatisticsFiles() throws Exception { - String data = readTableMetadataInputFile("TableMetadataPartitionStatisticsFiles.json"); + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testParsePartitionStatisticsFiles(int formatVersion) throws Exception { + assumeThat(formatVersion).isGreaterThanOrEqualTo(2); + + String data = + readTableMetadataInputFile( + String.format("TableMetadataV%sPartitionStatisticsFiles.json", formatVersion)); TableMetadata parsed = TableMetadataParser.fromJson(data); assertThat(parsed.partitionStatisticsFiles()) .hasSize(1) @@ -1613,8 +1749,9 @@ public void testParsePartitionStatisticsFiles() throws Exception { .build()); } - @Test - public void testNoReservedPropertyForTableMetadataCreation() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testNoReservedPropertyForTableMetadataCreation(int formatVersion) { Schema schema = new Schema(Types.NestedField.required(10, "x", Types.StringType.get())); assertThatThrownBy( @@ -1624,11 +1761,13 @@ public void testNoReservedPropertyForTableMetadataCreation() { PartitionSpec.unpartitioned(), null, "/tmp", - ImmutableMap.of(TableProperties.FORMAT_VERSION, "1"), - 1)) + ImmutableMap.of(TableProperties.FORMAT_VERSION, String.valueOf(formatVersion)), + formatVersion)) .isInstanceOf(IllegalArgumentException.class) .hasMessage( - "Table properties should not contain reserved properties, but got {format-version=1}"); + String.format( + "Table properties should not contain reserved properties, but got {format-version=%s}", + formatVersion)); assertThatThrownBy( () -> @@ -1638,18 +1777,24 @@ public void testNoReservedPropertyForTableMetadataCreation() { null, "/tmp", ImmutableMap.of(TableProperties.UUID, "uuid"), - 1)) + formatVersion)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Table properties should not contain reserved properties, but got {uuid=uuid}"); } - @Test - public void testNoTrailingLocationSlash() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void testNoTrailingLocationSlash(int formatVersion) { String locationWithSlash = "/with_trailing_slash/"; String locationWithoutSlash = "/with_trailing_slash"; TableMetadata meta = TableMetadata.newTableMetadata( - TEST_SCHEMA, SPEC_5, SORT_ORDER_3, locationWithSlash, Collections.emptyMap()); + TEST_SCHEMA, + SPEC_5, + SORT_ORDER_3, + locationWithSlash, + Collections.emptyMap(), + formatVersion); assertThat(meta.location()) .as("Metadata should never return a location ending in a slash") .isEqualTo(locationWithoutSlash); @@ -1670,10 +1815,14 @@ private String createManifestListWithManifestFile( return localInput(manifestList).location(); } - @Test - public void buildReplacementKeepsSnapshotLog() throws Exception { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void buildReplacementKeepsSnapshotLog(int formatVersion) throws Exception { + assumeThat(formatVersion).isGreaterThanOrEqualTo(2); + TableMetadata metadata = - TableMetadataParser.fromJson(readTableMetadataInputFile("TableMetadataV2Valid.json")); + TableMetadataParser.fromJson( + readTableMetadataInputFile(String.format("TableMetadataV%sValid.json", formatVersion))); assertThat(metadata.currentSnapshot()).isNotNull(); assertThat(metadata.snapshots()).hasSize(2); assertThat(metadata.snapshotLog()).hasSize(2); @@ -1711,22 +1860,12 @@ public void removeRefKeepsSnapshotLog() throws Exception { .containsExactlyElementsOf(metadata.snapshotLog()); } - @Test - public void testConstructV3Metadata() { - TableMetadata.newTableMetadata( - TEST_SCHEMA, - PartitionSpec.unpartitioned(), - SortOrder.unsorted(), - TEST_LOCATION, - ImmutableMap.of(), - 3); - } - - @Test - public void onlyMetadataLocationIsUpdatedWithoutTimestampAndMetadataLogEntry() { + @ParameterizedTest + @FieldSource("org.apache.iceberg.TestHelpers#ALL_VERSIONS") + public void onlyMetadataLocationIsUpdatedWithoutTimestampAndMetadataLogEntry(int formatVersion) { String uuid = "386b9f01-002b-4d8c-b77f-42c3fd3b7c9b"; TableMetadata metadata = - TableMetadata.buildFromEmpty() + TableMetadata.buildFromEmpty(formatVersion) .assignUUID(uuid) .setLocation("location") .setCurrentSchema(TEST_SCHEMA, 3) diff --git a/core/src/test/resources/TableMetadataPartitionStatisticsFiles.json b/core/src/test/resources/TableMetadataV2PartitionStatisticsFiles.json similarity index 100% rename from core/src/test/resources/TableMetadataPartitionStatisticsFiles.json rename to core/src/test/resources/TableMetadataV2PartitionStatisticsFiles.json diff --git a/core/src/test/resources/TableMetadataStatisticsFiles.json b/core/src/test/resources/TableMetadataV2StatisticsFiles.json similarity index 100% rename from core/src/test/resources/TableMetadataStatisticsFiles.json rename to core/src/test/resources/TableMetadataV2StatisticsFiles.json diff --git a/core/src/test/resources/TableMetadataV3CurrentSchemaNotFound.json b/core/src/test/resources/TableMetadataV3CurrentSchemaNotFound.json new file mode 100644 index 000000000000..6f1a3d907d7d --- /dev/null +++ b/core/src/test/resources/TableMetadataV3CurrentSchemaNotFound.json @@ -0,0 +1,146 @@ +{ + "format-version": 3, + "table-uuid": "9faafb13-1ce7-4603-93f8-197afc7394a9", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1733710954170, + "last-column-id": 3, + "current-schema-id": 2, + "schemas": [ + { + "type": "struct", + "schema-id": 7, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 6, + "fields": [ + { + "id": 10, + "name": "x", + "required": true, + "type": "string" + } + ] + } + ], + "default-spec-id": 5, + "partition-specs": [ + { + "spec-id": 5, + "fields": [] + } + ], + "last-partition-id": 999, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": { + "property": "value" + }, + "current-snapshot-id": 1733710954168, + "refs": { + "main": { + "snapshot-id": 1733710954168, + "type": "branch" + }, + "previous": { + "snapshot-id": 1733710953138, + "type": "tag" + }, + "test": { + "snapshot-id": 1733710953138, + "type": "branch" + } + }, + "snapshots": [ + { + "snapshot-id": 1733710953138, + "timestamp-ms": 1733710953138, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17078459248221091859.tmp" + }, + { + "snapshot-id": 1733710954168, + "parent-snapshot-id": 1733710953138, + "timestamp-ms": 1733710954168, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17490643917806818981.tmp", + "schema-id": 7 + } + ], + "statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/stats/file.puffin", + "file-size-in-bytes": 100, + "file-footer-size-in-bytes": 42, + "blob-metadata": [ + { + "type": "some-stats", + "snapshot-id": 11, + "sequence-number": 2, + "fields": [ + 4 + ] + } + ] + } + ], + "partition-statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/partition/stats/file.parquet", + "file-size-in-bytes": 42 + } + ], + "row-lineage": false, + "next-row-id": 0, + "snapshot-log": [ + { + "timestamp-ms": 1733710953138, + "snapshot-id": 1733710953138 + }, + { + "timestamp-ms": 1733710954168, + "snapshot-id": 1733710954168 + } + ], + "metadata-log": [] +} \ No newline at end of file diff --git a/core/src/test/resources/TableMetadataV3MissingLastPartitionId.json b/core/src/test/resources/TableMetadataV3MissingLastPartitionId.json new file mode 100644 index 000000000000..afd819feb8b5 --- /dev/null +++ b/core/src/test/resources/TableMetadataV3MissingLastPartitionId.json @@ -0,0 +1,145 @@ +{ + "format-version": 3, + "table-uuid": "9faafb13-1ce7-4603-93f8-197afc7394a9", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1733710954170, + "last-column-id": 3, + "current-schema-id": 7, + "schemas": [ + { + "type": "struct", + "schema-id": 7, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 6, + "fields": [ + { + "id": 10, + "name": "x", + "required": true, + "type": "string" + } + ] + } + ], + "default-spec-id": 5, + "partition-specs": [ + { + "spec-id": 5, + "fields": [] + } + ], + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": { + "property": "value" + }, + "current-snapshot-id": 1733710954168, + "refs": { + "main": { + "snapshot-id": 1733710954168, + "type": "branch" + }, + "previous": { + "snapshot-id": 1733710953138, + "type": "tag" + }, + "test": { + "snapshot-id": 1733710953138, + "type": "branch" + } + }, + "snapshots": [ + { + "snapshot-id": 1733710953138, + "timestamp-ms": 1733710953138, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17078459248221091859.tmp" + }, + { + "snapshot-id": 1733710954168, + "parent-snapshot-id": 1733710953138, + "timestamp-ms": 1733710954168, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17490643917806818981.tmp", + "schema-id": 7 + } + ], + "statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/stats/file.puffin", + "file-size-in-bytes": 100, + "file-footer-size-in-bytes": 42, + "blob-metadata": [ + { + "type": "some-stats", + "snapshot-id": 11, + "sequence-number": 2, + "fields": [ + 4 + ] + } + ] + } + ], + "partition-statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/partition/stats/file.parquet", + "file-size-in-bytes": 42 + } + ], + "row-lineage": false, + "next-row-id": 0, + "snapshot-log": [ + { + "timestamp-ms": 1733710953138, + "snapshot-id": 1733710953138 + }, + { + "timestamp-ms": 1733710954168, + "snapshot-id": 1733710954168 + } + ], + "metadata-log": [] +} \ No newline at end of file diff --git a/core/src/test/resources/TableMetadataV3MissingPartitionSpecs.json b/core/src/test/resources/TableMetadataV3MissingPartitionSpecs.json new file mode 100644 index 000000000000..ea7e6002856b --- /dev/null +++ b/core/src/test/resources/TableMetadataV3MissingPartitionSpecs.json @@ -0,0 +1,146 @@ +{ + "format-version": 3, + "table-uuid": "9faafb13-1ce7-4603-93f8-197afc7394a9", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1733710954170, + "last-column-id": 3, + "current-schema-id": 7, + "schemas": [ + { + "type": "struct", + "schema-id": 7, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 6, + "fields": [ + { + "id": 10, + "name": "x", + "required": true, + "type": "string" + } + ] + } + ], + "partition-spec": [ + { + "name": "x", + "transform": "identity", + "source-id": 1, + "field-id": 1000 + } + ], + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": { + "property": "value" + }, + "current-snapshot-id": 1733710954168, + "refs": { + "main": { + "snapshot-id": 1733710954168, + "type": "branch" + }, + "previous": { + "snapshot-id": 1733710953138, + "type": "tag" + }, + "test": { + "snapshot-id": 1733710953138, + "type": "branch" + } + }, + "snapshots": [ + { + "snapshot-id": 1733710953138, + "timestamp-ms": 1733710953138, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17078459248221091859.tmp" + }, + { + "snapshot-id": 1733710954168, + "parent-snapshot-id": 1733710953138, + "timestamp-ms": 1733710954168, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17490643917806818981.tmp", + "schema-id": 7 + } + ], + "statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/stats/file.puffin", + "file-size-in-bytes": 100, + "file-footer-size-in-bytes": 42, + "blob-metadata": [ + { + "type": "some-stats", + "snapshot-id": 11, + "sequence-number": 2, + "fields": [ + 4 + ] + } + ] + } + ], + "partition-statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/partition/stats/file.parquet", + "file-size-in-bytes": 42 + } + ], + "row-lineage": false, + "next-row-id": 0, + "snapshot-log": [ + { + "timestamp-ms": 1733710953138, + "snapshot-id": 1733710953138 + }, + { + "timestamp-ms": 1733710954168, + "snapshot-id": 1733710954168 + } + ], + "metadata-log": [] +} \ No newline at end of file diff --git a/core/src/test/resources/TableMetadataV3MissingSchemas.json b/core/src/test/resources/TableMetadataV3MissingSchemas.json new file mode 100644 index 000000000000..a4da9157cf38 --- /dev/null +++ b/core/src/test/resources/TableMetadataV3MissingSchemas.json @@ -0,0 +1,130 @@ +{ + "format-version": 3, + "table-uuid": "9faafb13-1ce7-4603-93f8-197afc7394a9", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1733710954170, + "last-column-id": 3, + "schema": { + "type": "struct", + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + }, + "default-spec-id": 5, + "partition-specs": [ + { + "spec-id": 5, + "fields": [] + } + ], + "last-partition-id": 999, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": { + "property": "value" + }, + "current-snapshot-id": 1733710954168, + "refs": { + "main": { + "snapshot-id": 1733710954168, + "type": "branch" + }, + "previous": { + "snapshot-id": 1733710953138, + "type": "tag" + }, + "test": { + "snapshot-id": 1733710953138, + "type": "branch" + } + }, + "snapshots": [ + { + "snapshot-id": 1733710953138, + "timestamp-ms": 1733710953138, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17078459248221091859.tmp" + }, + { + "snapshot-id": 1733710954168, + "parent-snapshot-id": 1733710953138, + "timestamp-ms": 1733710954168, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17490643917806818981.tmp", + "schema-id": 7 + } + ], + "statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/stats/file.puffin", + "file-size-in-bytes": 100, + "file-footer-size-in-bytes": 42, + "blob-metadata": [ + { + "type": "some-stats", + "snapshot-id": 11, + "sequence-number": 2, + "fields": [ + 4 + ] + } + ] + } + ], + "partition-statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/partition/stats/file.parquet", + "file-size-in-bytes": 42 + } + ], + "row-lineage": false, + "next-row-id": 0, + "snapshot-log": [ + { + "timestamp-ms": 1733710953138, + "snapshot-id": 1733710953138 + }, + { + "timestamp-ms": 1733710954168, + "snapshot-id": 1733710954168 + } + ], + "metadata-log": [] +} \ No newline at end of file diff --git a/core/src/test/resources/TableMetadataV3MissingSortOrder.json b/core/src/test/resources/TableMetadataV3MissingSortOrder.json new file mode 100644 index 000000000000..80a3dd11d5f2 --- /dev/null +++ b/core/src/test/resources/TableMetadataV3MissingSortOrder.json @@ -0,0 +1,127 @@ +{ + "format-version": 3, + "table-uuid": "9faafb13-1ce7-4603-93f8-197afc7394a9", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1733710954170, + "last-column-id": 3, + "current-schema-id": 7, + "schemas": [ + { + "type": "struct", + "schema-id": 7, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 6, + "fields": [ + { + "id": 10, + "name": "x", + "required": true, + "type": "string" + } + ] + } + ], + "default-spec-id": 5, + "partition-specs": [ + { + "spec-id": 5, + "fields": [] + } + ], + "last-partition-id": 999, + "default-sort-order-id": 3, + "properties": { + "property": "value" + }, + "current-snapshot-id": 1733710954168, + "refs": { + "main": { + "snapshot-id": 1733710954168, + "type": "branch" + }, + "previous": { + "snapshot-id": 1733710953138, + "type": "tag" + }, + "test": { + "snapshot-id": 1733710953138, + "type": "branch" + } + }, + "snapshots": [ + { + "snapshot-id": 1733710953138, + "timestamp-ms": 1733710953138, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17078459248221091859.tmp" + }, + { + "snapshot-id": 1733710954168, + "parent-snapshot-id": 1733710953138, + "timestamp-ms": 1733710954168, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17490643917806818981.tmp", + "schema-id": 7 + } + ], + "statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/stats/file.puffin", + "file-size-in-bytes": 100, + "file-footer-size-in-bytes": 42, + "blob-metadata": [ + { + "type": "some-stats", + "snapshot-id": 11, + "sequence-number": 2, + "fields": [ + 4 + ] + } + ] + } + ], + "partition-statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/partition/stats/file.parquet", + "file-size-in-bytes": 42 + } + ], + "row-lineage": false, + "next-row-id": 0, + "snapshot-log": [ + { + "timestamp-ms": 1733710953138, + "snapshot-id": 1733710953138 + }, + { + "timestamp-ms": 1733710954168, + "snapshot-id": 1733710954168 + } + ], + "metadata-log": [] +} \ No newline at end of file diff --git a/core/src/test/resources/TableMetadataV3PartitionStatisticsFiles.json b/core/src/test/resources/TableMetadataV3PartitionStatisticsFiles.json new file mode 100644 index 000000000000..796f5bc8b13b --- /dev/null +++ b/core/src/test/resources/TableMetadataV3PartitionStatisticsFiles.json @@ -0,0 +1,61 @@ +{ + "format-version": 3, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 0, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + } + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 0, + "sort-orders": [ + { + "order-id": 0, + "fields": [] + } + ], + "properties": {}, + "current-snapshot-id": 3055729675574597004, + "snapshots": [ + { + "snapshot-id": 3055729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/2.avro", + "schema-id": 0 + } + ], + "partition-statistics": [ + { + "snapshot-id": 3055729675574597004, + "statistics-path": "s3://a/b/partition-stats.parquet", + "file-size-in-bytes": 43 + } + ], + "snapshot-log": [], + "metadata-log": [] +} diff --git a/core/src/test/resources/TableMetadataV3StatisticsFiles.json b/core/src/test/resources/TableMetadataV3StatisticsFiles.json new file mode 100644 index 000000000000..6768f83f7a34 --- /dev/null +++ b/core/src/test/resources/TableMetadataV3StatisticsFiles.json @@ -0,0 +1,70 @@ +{ + "format-version": 3, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 0, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + } + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 0, + "sort-orders": [ + { + "order-id": 0, + "fields": [] + } + ], + "properties": {}, + "current-snapshot-id": 3055729675574597004, + "snapshots": [ + { + "snapshot-id": 3055729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/2.avro", + "schema-id": 0 + } + ], + "statistics": [ + { + "snapshot-id": 3055729675574597004, + "statistics-path": "s3://a/b/stats.puffin", + "file-size-in-bytes": 413, + "file-footer-size-in-bytes": 42, + "blob-metadata": [ + { + "type": "ndv", + "snapshot-id": 3055729675574597004, + "sequence-number": 1, + "fields": [1] + } + ] + } + ], + "snapshot-log": [], + "metadata-log": [] +} \ No newline at end of file diff --git a/core/src/test/resources/TableMetadataV3Valid.json b/core/src/test/resources/TableMetadataV3Valid.json new file mode 100644 index 000000000000..a65b77b4c0d2 --- /dev/null +++ b/core/src/test/resources/TableMetadataV3Valid.json @@ -0,0 +1,162 @@ +{ + "format-version": 3, + "table-uuid": "9faafb13-1ce7-4603-93f8-197afc7394a9", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1733710954170, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [ + 1, + 2 + ], + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 6, + "fields": [ + { + "id": 10, + "name": "x", + "required": true, + "type": "string" + } + ] + } + ], + "default-spec-id": 5, + "partition-specs": [ + { + "spec-id": 5, + "fields": [] + } + ], + "last-partition-id": 999, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": { + "property": "value" + }, + "current-snapshot-id": 1733710954168, + "refs": { + "main": { + "snapshot-id": 1733710954168, + "type": "branch" + }, + "previous": { + "snapshot-id": 1733710953138, + "type": "tag" + }, + "test": { + "snapshot-id": 1733710953138, + "type": "branch" + } + }, + "snapshots": [ + { + "snapshot-id": 1733710953138, + "timestamp-ms": 1733710953138, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17078459248221091859.tmp" + }, + { + "snapshot-id": 1733710954168, + "parent-snapshot-id": 1733710953138, + "timestamp-ms": 1733710954168, + "manifest-list": "/var/folders/z8/cv70q7pj1l13r_j628cyyks80000gn/T/junit-9477945946492951907/manifests17490643917806818981.tmp", + "schema-id": 7 + } + ], + "statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/stats/file.puffin", + "file-size-in-bytes": 100, + "file-footer-size-in-bytes": 42, + "blob-metadata": [ + { + "type": "some-stats", + "snapshot-id": 11, + "sequence-number": 2, + "fields": [ + 4 + ] + } + ] + } + ], + "partition-statistics": [ + { + "snapshot-id": 11, + "statistics-path": "/some/partition/stats/file.parquet", + "file-size-in-bytes": 42 + } + ], + "row-lineage": false, + "next-row-id": 0, + "snapshot-log": [ + { + "timestamp-ms": 1733710953138, + "snapshot-id": 1733710953138 + }, + { + "timestamp-ms": 1733710954168, + "snapshot-id": 1733710954168 + } + ], + "metadata-log": [] +} \ No newline at end of file diff --git a/core/src/test/resources/TableMetadataV3ValidMinimal.json b/core/src/test/resources/TableMetadataV3ValidMinimal.json new file mode 100644 index 000000000000..a6bfb5c7f10f --- /dev/null +++ b/core/src/test/resources/TableMetadataV3ValidMinimal.json @@ -0,0 +1,76 @@ +{ + "format-version": 3, + "table-uuid": "9faafb13-1ce7-4603-93f8-197afc7394a9", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1733710954170, + "last-column-id": 3, + "current-schema-id": 7, + "schemas": [ + { + "type": "struct", + "schema-id": 7, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 6, + "fields": [ + { + "id": 10, + "name": "x", + "required": true, + "type": "string" + } + ] + } + ], + "default-spec-id": 5, + "partition-specs": [ + { + "spec-id": 5, + "fields": [] + } + ], + "last-partition-id": 999, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ] +} \ No newline at end of file