-
Notifications
You must be signed in to change notification settings - Fork 3k
Core: allow creating v2 tables through table property #2887
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
466fe26
257eaaf
4ba96b3
bff76bb
197fc6a
083dec1
34094f2
aae17e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,7 @@ | |
| import java.util.concurrent.TimeUnit; | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.function.Predicate; | ||
| import java.util.stream.Collectors; | ||
| import org.apache.iceberg.exceptions.ValidationException; | ||
| import org.apache.iceberg.io.InputFile; | ||
| import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; | ||
|
|
@@ -57,31 +58,30 @@ public class TableMetadata implements Serializable { | |
|
|
||
| private static final long ONE_MINUTE = TimeUnit.MINUTES.toMillis(1); | ||
|
|
||
| /** | ||
| * @deprecated will be removed in 0.9.0; use newTableMetadata(Schema, PartitionSpec, String, Map) instead. | ||
| */ | ||
| @Deprecated | ||
| public static TableMetadata newTableMetadata(TableOperations ops, | ||
| Schema schema, | ||
| PartitionSpec spec, | ||
| String location, | ||
| Map<String, String> properties) { | ||
| return newTableMetadata(schema, spec, SortOrder.unsorted(), location, properties, DEFAULT_TABLE_FORMAT_VERSION); | ||
| } | ||
|
|
||
| public static TableMetadata newTableMetadata(Schema schema, | ||
| PartitionSpec spec, | ||
| SortOrder sortOrder, | ||
| String location, | ||
| Map<String, String> properties) { | ||
| return newTableMetadata(schema, spec, sortOrder, location, properties, DEFAULT_TABLE_FORMAT_VERSION); | ||
| int formatVersion = PropertyUtil.propertyAsInt(properties, TableProperties.FORMAT_VERSION, | ||
| DEFAULT_TABLE_FORMAT_VERSION); | ||
| return newTableMetadata(schema, spec, sortOrder, location, unreservedProperties(properties), formatVersion); | ||
| } | ||
|
|
||
| public static TableMetadata newTableMetadata(Schema schema, | ||
| PartitionSpec spec, | ||
| String location, | ||
| Map<String, String> properties) { | ||
| return newTableMetadata(schema, spec, SortOrder.unsorted(), location, properties, DEFAULT_TABLE_FORMAT_VERSION); | ||
| SortOrder sortOrder = SortOrder.unsorted(); | ||
| int formatVersion = PropertyUtil.propertyAsInt(properties, TableProperties.FORMAT_VERSION, | ||
| DEFAULT_TABLE_FORMAT_VERSION); | ||
| return newTableMetadata(schema, spec, sortOrder, location, unreservedProperties(properties), formatVersion); | ||
| } | ||
|
|
||
| private static Map<String, String> unreservedProperties(Map<String, String> rawProperties) { | ||
| return rawProperties.entrySet().stream() | ||
| .filter(e -> !TableProperties.RESERVED_PROPERTIES.contains(e.getKey())) | ||
| .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); | ||
| } | ||
|
|
||
| static TableMetadata newTableMetadata(Schema schema, | ||
|
|
@@ -90,6 +90,9 @@ static TableMetadata newTableMetadata(Schema schema, | |
| String location, | ||
| Map<String, String> properties, | ||
| int formatVersion) { | ||
| Preconditions.checkArgument(properties.keySet().stream().noneMatch(TableProperties.RESERVED_PROPERTIES::contains), | ||
| "Table properties should not contain reserved properties, but got %s", properties); | ||
|
|
||
| // reassign all column ids to ensure consistency | ||
| AtomicInteger lastColumnId = new AtomicInteger(0); | ||
| Schema freshSchema = TypeUtil.assignFreshIds(INITIAL_SCHEMA_ID, schema, lastColumnId::incrementAndGet); | ||
|
|
@@ -677,12 +680,20 @@ private TableMetadata setCurrentSnapshotTo(Snapshot snapshot) { | |
| newSnapshotLog, addPreviousFile(file, lastUpdatedMillis)); | ||
| } | ||
|
|
||
| public TableMetadata replaceProperties(Map<String, String> newProperties) { | ||
| ValidationException.check(newProperties != null, "Cannot set properties to null"); | ||
| return new TableMetadata(null, formatVersion, uuid, location, | ||
| public TableMetadata replaceProperties(Map<String, String> rawProperties) { | ||
| ValidationException.check(rawProperties != null, "Cannot set properties to null"); | ||
| Map<String, String> newProperties = unreservedProperties(rawProperties); | ||
| TableMetadata metadata = new TableMetadata(null, formatVersion, uuid, location, | ||
| lastSequenceNumber, System.currentTimeMillis(), lastColumnId, currentSchemaId, schemas, defaultSpecId, specs, | ||
| lastAssignedPartitionId, defaultSortOrderId, sortOrders, newProperties, currentSnapshotId, snapshots, | ||
| snapshotLog, addPreviousFile(file, lastUpdatedMillis, newProperties)); | ||
|
|
||
| int newFormatVersion = PropertyUtil.propertyAsInt(rawProperties, TableProperties.FORMAT_VERSION, formatVersion); | ||
| if (formatVersion != newFormatVersion) { | ||
| metadata = metadata.upgradeToFormatVersion(newFormatVersion); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems a bit weird to call another instance method to produce a second |
||
| } | ||
|
|
||
| return metadata; | ||
| } | ||
|
|
||
| public TableMetadata removeSnapshotLogEntries(Set<Long> snapshotIds) { | ||
|
|
@@ -754,7 +765,10 @@ public TableMetadata buildReplacement(Schema updatedSchema, PartitionSpec update | |
|
|
||
| Map<String, String> newProperties = Maps.newHashMap(); | ||
| newProperties.putAll(this.properties); | ||
| newProperties.putAll(updatedProperties); | ||
| newProperties.putAll(unreservedProperties(updatedProperties)); | ||
|
|
||
| // check if there is format version override | ||
| int newFormatVersion = PropertyUtil.propertyAsInt(updatedProperties, TableProperties.FORMAT_VERSION, formatVersion); | ||
|
|
||
| // determine the next schema id | ||
| int freshSchemaId = reuseOrCreateNewSchemaId(freshSchema); | ||
|
|
@@ -764,11 +778,17 @@ public TableMetadata buildReplacement(Schema updatedSchema, PartitionSpec update | |
| schemasBuilder.add(new Schema(freshSchemaId, freshSchema.columns(), freshSchema.identifierFieldIds())); | ||
| } | ||
|
|
||
| return new TableMetadata(null, formatVersion, uuid, newLocation, | ||
| TableMetadata metadata = new TableMetadata(null, formatVersion, uuid, newLocation, | ||
| lastSequenceNumber, System.currentTimeMillis(), newLastColumnId.get(), freshSchemaId, schemasBuilder.build(), | ||
| specId, specListBuilder.build(), Math.max(lastAssignedPartitionId, freshSpec.lastAssignedFieldId()), | ||
| orderId, sortOrdersBuilder.build(), ImmutableMap.copyOf(newProperties), | ||
| -1, snapshots, ImmutableList.of(), addPreviousFile(file, lastUpdatedMillis, newProperties)); | ||
|
|
||
| if (formatVersion != newFormatVersion) { | ||
| metadata = metadata.upgradeToFormatVersion(newFormatVersion); | ||
| } | ||
|
|
||
| return metadata; | ||
| } | ||
|
|
||
| public TableMetadata updateLocation(String newLocation) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -756,4 +756,68 @@ public void testUpdateSchema() { | |
| Assert.assertEquals("Should return expected last column id", | ||
| 6, threeSchemaTable.lastColumnId()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testCreateV2MetadataThroughTableProperty() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also add the unit tests in flink/spark/hive module to verify the end-to-end DDL work ? If so, I think we could make it to be a separate PR for this.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think we could access the format version by using the following code: Table table = ...
TableOperations ops = ((BaseTable) table).operations();
TableMetadata meta = ops.current();
int formatVersion = meta.formatVersion(); |
||
| Schema schema = new Schema( | ||
| Types.NestedField.required(10, "x", Types.StringType.get()) | ||
| ); | ||
|
|
||
| TableMetadata meta = TableMetadata.newTableMetadata(schema, PartitionSpec.unpartitioned(), null, | ||
| ImmutableMap.of(TableProperties.FORMAT_VERSION, "2", "key", "val")); | ||
|
|
||
| Assert.assertEquals("format version should be configured based on the format-version key", | ||
| 2, meta.formatVersion()); | ||
| Assert.assertEquals("should not contain format-version in properties", | ||
| ImmutableMap.of("key", "val"), meta.properties()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testReplaceV1MetadataToV2ThroughTableProperty() { | ||
| Schema schema = new Schema( | ||
| Types.NestedField.required(10, "x", Types.StringType.get()) | ||
| ); | ||
|
|
||
| TableMetadata meta = TableMetadata.newTableMetadata(schema, PartitionSpec.unpartitioned(), null, | ||
| ImmutableMap.of(TableProperties.FORMAT_VERSION, "1", "key", "val")); | ||
|
|
||
| meta = meta.buildReplacement(meta.schema(), meta.spec(), meta.sortOrder(), meta.location(), | ||
| ImmutableMap.of(TableProperties.FORMAT_VERSION, "2", "key2", "val2")); | ||
|
|
||
| Assert.assertEquals("format version should be configured based on the format-version key", | ||
| 2, meta.formatVersion()); | ||
| Assert.assertEquals("should not contain format-version but should contain old and new properties", | ||
| ImmutableMap.of("key", "val", "key2", "val2"), meta.properties()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testUpgradeV1MetadataToV2ThroughTableProperty() { | ||
| Schema schema = new Schema( | ||
| Types.NestedField.required(10, "x", Types.StringType.get()) | ||
| ); | ||
|
|
||
| TableMetadata meta = TableMetadata.newTableMetadata(schema, PartitionSpec.unpartitioned(), null, | ||
| ImmutableMap.of(TableProperties.FORMAT_VERSION, "1", "key", "val")); | ||
|
|
||
| meta = meta.replaceProperties(ImmutableMap.of(TableProperties.FORMAT_VERSION, | ||
| "2", "key2", "val2")); | ||
|
|
||
| Assert.assertEquals("format version should be configured based on the format-version key", | ||
| 2, meta.formatVersion()); | ||
| Assert.assertEquals("should not contain format-version but should contain new properties", | ||
| ImmutableMap.of("key2", "val2"), meta.properties()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testNoReservedPropertyForTableMetadataCreation() { | ||
| Schema schema = new Schema( | ||
| Types.NestedField.required(10, "x", Types.StringType.get()) | ||
| ); | ||
|
|
||
| AssertHelpers.assertThrows("should not allow reserved table property when creating table metadata", | ||
| IllegalArgumentException.class, | ||
| "Table properties should not contain reserved properties, but got {format-version=1}", | ||
| () -> TableMetadata.newTableMetadata(schema, PartitionSpec.unpartitioned(), null, "/tmp", | ||
| ImmutableMap.of(TableProperties.FORMAT_VERSION, "1"), 1)); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for removing this!