diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index 164e29505c84..2533fa034731 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -731,6 +731,7 @@ public static Builder buildFrom(TableMetadata base) { public static class Builder { private final TableMetadata base; + private String metadataLocation; private int formatVersion; private String uuid; private Long lastUpdatedMillis; @@ -795,6 +796,11 @@ private Builder(TableMetadata base) { this.sortOrdersById = Maps.newHashMap(base.sortOrdersById); } + public Builder withMetadataLocation(String newMetadataLocation) { + this.metadataLocation = newMetadataLocation; + return this; + } + public Builder assignUUID() { if (uuid == null) { this.uuid = UUID.randomUUID().toString(); @@ -1009,6 +1015,12 @@ public TableMetadata build() { this.lastUpdatedMillis = System.currentTimeMillis(); } + // when associated with a metadata file, table metadata must have no changes so that the metadata matches exactly + // what is in the metadata file, which does not store changes. metadata location with changes is inconsistent. + Preconditions.checkArgument( + changes.size() == 0 || discardChanges || metadataLocation == null, + "Cannot set metadata location with changes to table metadata: %s changes", changes.size()); + Schema schema = schemasById.get(currentSchemaId); PartitionSpec.checkCompatibility(specsById.get(defaultSpecId), schema); SortOrder.checkCompatibility(sortOrdersById.get(defaultSortOrderId), schema); @@ -1018,7 +1030,7 @@ public TableMetadata build() { List newSnapshotLog = updateSnapshotLog(snapshotLog, snapshotsById, currentSnapshotId, changes); return new TableMetadata( - null, + metadataLocation, formatVersion, uuid, location, diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java index 6b406626c88c..3c64c19a27f5 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieTableOperations.java @@ -90,6 +90,7 @@ private TableMetadata loadTableMetadata(String metadataLocation) { .setCurrentSchema(table.getSchemaId()) .setDefaultSortOrder(table.getSortOrderId()) .setDefaultPartitionSpec(table.getSpecId()) + .withMetadataLocation(metadataLocation) .discardChanges() .build(); } diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java index 64bd6660be31..3a8b40d7b2b6 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestBranchVisibility.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.Map; import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.iceberg.BaseTable; import org.apache.iceberg.DataFile; import org.apache.iceberg.Snapshot; import org.apache.iceberg.Table; @@ -173,6 +174,32 @@ public void testSchemaSnapshot() throws Exception { Assertions.assertThat(metadataOn2).isNotEqualTo(metadataOnTest).isNotEqualTo(metadataOnTest2); } + @Test + public void testMetadataLocation() throws Exception { + String branch1 = "test"; + String branch2 = "branch-2"; + + // commit on tableIdentifier1 on branch1 + NessieCatalog catalog = initCatalog(branch1); + String metadataLocationOfCommit1 = addRow(catalog, tableIdentifier1, "initial-data", + ImmutableMap.of("id0", 4L)); + + createBranch(branch2, catalog.currentHash(), branch1); + // commit on tableIdentifier1 on branch2 + catalog = initCatalog(branch2); + String metadataLocationOfCommit2 = addRow(catalog, tableIdentifier1, "some-more-data", + ImmutableMap.of("id0", 42L)); + Assertions.assertThat(metadataLocationOfCommit2).isNotNull().isNotEqualTo(metadataLocationOfCommit1); + + catalog = initCatalog(branch1); + // load tableIdentifier1 on branch1 + BaseTable table = (BaseTable) catalog.loadTable(tableIdentifier1); + // branch1's tableIdentifier1's metadata location + // should be the latest global state (aka commit2 from branch2) + Assertions.assertThat(table.operations().current().metadataFileLocation()) + .isNotNull().isEqualTo(metadataLocationOfCommit2); + } + /** * Complex-ish test case that verifies that both the snapshot-ID and schema-ID are properly set * and retained when working with a mixture of DDLs and DMLs across multiple branches.