Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion core/src/main/java/org/apache/iceberg/TableMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,7 @@ public static Builder buildFrom(TableMetadata base) {

public static class Builder {
private final TableMetadata base;
private String metadataLocation;
private int formatVersion;
private String uuid;
private Long lastUpdatedMillis;
Expand Down Expand Up @@ -795,6 +796,11 @@ private Builder(TableMetadata base) {
this.sortOrdersById = Maps.newHashMap(base.sortOrdersById);
}

public Builder withMetadataLocation(String newMetadataLocation) {
this.metadataLocation = newMetadataLocation;
return this;
}

public Builder assignUUID() {
if (uuid == null) {
this.uuid = UUID.randomUUID().toString();
Expand Down Expand Up @@ -1009,6 +1015,12 @@ public TableMetadata build() {
this.lastUpdatedMillis = System.currentTimeMillis();
}

// when associated with a metadata file, table metadata must have no changes so that the metadata matches exactly
// what is in the metadata file, which does not store changes. metadata location with changes is inconsistent.
Preconditions.checkArgument(
changes.size() == 0 || discardChanges || metadataLocation == null,
"Cannot set metadata location with changes to table metadata: %s changes", changes.size());

Schema schema = schemasById.get(currentSchemaId);
PartitionSpec.checkCompatibility(specsById.get(defaultSpecId), schema);
SortOrder.checkCompatibility(sortOrdersById.get(defaultSortOrderId), schema);
Expand All @@ -1018,7 +1030,7 @@ public TableMetadata build() {
List<HistoryEntry> newSnapshotLog = updateSnapshotLog(snapshotLog, snapshotsById, currentSnapshotId, changes);

return new TableMetadata(
null,
metadataLocation,
formatVersion,
uuid,
location,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ private TableMetadata loadTableMetadata(String metadataLocation) {
.setCurrentSchema(table.getSchemaId())
.setDefaultSortOrder(table.getSortOrderId())
.setDefaultPartitionSpec(table.getSpecId())
.withMetadataLocation(metadataLocation)
.discardChanges()
.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.Collections;
import java.util.Map;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
Expand Down Expand Up @@ -173,6 +174,32 @@ public void testSchemaSnapshot() throws Exception {
Assertions.assertThat(metadataOn2).isNotEqualTo(metadataOnTest).isNotEqualTo(metadataOnTest2);
}

@Test
public void testMetadataLocation() throws Exception {
String branch1 = "test";
String branch2 = "branch-2";

// commit on tableIdentifier1 on branch1
NessieCatalog catalog = initCatalog(branch1);
String metadataLocationOfCommit1 = addRow(catalog, tableIdentifier1, "initial-data",
ImmutableMap.of("id0", 4L));

createBranch(branch2, catalog.currentHash(), branch1);
// commit on tableIdentifier1 on branch2
catalog = initCatalog(branch2);
String metadataLocationOfCommit2 = addRow(catalog, tableIdentifier1, "some-more-data",
ImmutableMap.of("id0", 42L));
Assertions.assertThat(metadataLocationOfCommit2).isNotNull().isNotEqualTo(metadataLocationOfCommit1);

catalog = initCatalog(branch1);
// load tableIdentifier1 on branch1
BaseTable table = (BaseTable) catalog.loadTable(tableIdentifier1);
// branch1's tableIdentifier1's metadata location
// should be the latest global state (aka commit2 from branch2)
Assertions.assertThat(table.operations().current().metadataFileLocation())
.isNotNull().isEqualTo(metadataLocationOfCommit2);
}

/**
* Complex-ish test case that verifies that both the snapshot-ID and schema-ID are properly set
* and retained when working with a mixture of DDLs and DMLs across multiple branches.
Expand Down