-
Notifications
You must be signed in to change notification settings - Fork 5.5k
feat: Add initial support for Iceberg format version 3 #27021
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -114,6 +114,7 @@ | |
| import org.apache.iceberg.Snapshot; | ||
| import org.apache.iceberg.SortOrder; | ||
| import org.apache.iceberg.Table; | ||
| import org.apache.iceberg.TableMetadata; | ||
| import org.apache.iceberg.TableProperties; | ||
| import org.apache.iceberg.Transaction; | ||
| import org.apache.iceberg.UpdatePartitionSpec; | ||
|
|
@@ -189,6 +190,7 @@ | |
| import static com.facebook.presto.iceberg.IcebergTableType.CHANGELOG; | ||
| import static com.facebook.presto.iceberg.IcebergTableType.DATA; | ||
| import static com.facebook.presto.iceberg.IcebergTableType.EQUALITY_DELETES; | ||
| import static com.facebook.presto.iceberg.IcebergUtil.MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS; | ||
| import static com.facebook.presto.iceberg.IcebergUtil.MIN_FORMAT_VERSION_FOR_DELETE; | ||
| import static com.facebook.presto.iceberg.IcebergUtil.getColumns; | ||
| import static com.facebook.presto.iceberg.IcebergUtil.getColumnsForWrite; | ||
|
|
@@ -357,6 +359,48 @@ public Optional<IcebergProcedureContext> getProcedureContext() | |
| return this.procedureContext; | ||
| } | ||
|
|
||
| protected static void validateTableForPresto(BaseTable table, Optional<Long> tableSnapshotId) | ||
| { | ||
| Snapshot snapshot; | ||
| try { | ||
| snapshot = tableSnapshotId | ||
| .map(table::snapshot) | ||
| .orElse(table.currentSnapshot()); | ||
| } | ||
| catch (RuntimeException e) { | ||
| // If the snapshot cannot be retrieved (e.g. metadata is missing), we cannot validate the table. | ||
| // Returning here allows operations that do not strictly require the snapshot (like DROP TABLE) to proceed. | ||
| return; | ||
| } | ||
|
|
||
| if (snapshot == null) { | ||
| // empty table, nothing to validate | ||
| return; | ||
| } | ||
|
|
||
| TableMetadata metadata = table.operations().current(); | ||
| if (metadata.formatVersion() < 3) { | ||
| return; | ||
| } | ||
|
|
||
| Schema schema = metadata.schemasById().get(snapshot.schemaId()); | ||
| if (schema == null) { | ||
| schema = metadata.schema(); | ||
| } | ||
|
|
||
| // Reject schema default values (initial-default / write-default) | ||
| for (Types.NestedField field : schema.columns()) { | ||
|
Comment on lines
+391
to
+392
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Column default validation only considers top-level columns and may miss nested fields
|
||
| if (field.initialDefault() != null || field.writeDefault() != null) { | ||
| throw new PrestoException(NOT_SUPPORTED, "Iceberg v3 column default values are not supported"); | ||
| } | ||
| } | ||
|
|
||
| // Reject Iceberg table encryption | ||
| if (!metadata.encryptionKeys().isEmpty() || snapshot.keyId() != null || metadata.properties().containsKey("encryption.key-id")) { | ||
| throw new PrestoException(NOT_SUPPORTED, "Iceberg table encryption is not supported"); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * This class implements the default implementation for getTableLayoutForConstraint which will be used in the case of a Java Worker | ||
| */ | ||
|
|
@@ -829,6 +873,11 @@ public ConnectorMergeTableHandle beginMerge(ConnectorSession session, ConnectorT | |
| Table icebergTable = getIcebergTable(session, icebergTableHandle.getSchemaTableName()); | ||
| int formatVersion = ((BaseTable) icebergTable).operations().current().formatVersion(); | ||
|
|
||
| if (formatVersion > MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS) { | ||
| throw new PrestoException(NOT_SUPPORTED, | ||
| format("Iceberg table updates for format version %s are not supported yet", formatVersion)); | ||
| } | ||
hantangwangd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (formatVersion < MIN_FORMAT_VERSION_FOR_DELETE || | ||
| !Optional.ofNullable(icebergTable.properties().get(TableProperties.UPDATE_MODE)) | ||
| .map(mode -> mode.equals(MERGE_ON_READ.modeName())) | ||
|
|
@@ -1302,6 +1351,12 @@ public IcebergTableHandle getTableHandle(ConnectorSession session, SchemaTableNa | |
| }) | ||
| .orElseGet(() -> resolveSnapshotIdByName(table, name)); | ||
|
|
||
| // Validate unsupported v3 features (column defaults, encryption) before | ||
| // proceeding | ||
| if (table instanceof BaseTable) { | ||
| validateTableForPresto((BaseTable) table, tableSnapshotId); | ||
| } | ||
|
|
||
| // Get Iceberg tables schema, properties, and location with missing | ||
| // filesystem metadata will fail. | ||
| // See https://github.com/prestodb/presto/pull/21181 | ||
|
|
@@ -1404,6 +1459,10 @@ public ConnectorDeleteTableHandle beginDelete(ConnectorSession session, Connecto | |
| if (formatVersion < MIN_FORMAT_VERSION_FOR_DELETE) { | ||
| throw new PrestoException(NOT_SUPPORTED, format("This connector only supports delete where one or more partitions are deleted entirely for table versions older than %d", MIN_FORMAT_VERSION_FOR_DELETE)); | ||
| } | ||
| if (formatVersion > MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS) { | ||
| throw new PrestoException(NOT_SUPPORTED, | ||
| format("Iceberg table updates for format version %s are not supported yet", formatVersion)); | ||
| } | ||
| if (getDeleteMode(icebergTable) == RowLevelOperationMode.COPY_ON_WRITE) { | ||
| throw new PrestoException(NOT_SUPPORTED, "This connector only supports delete where one or more partitions are deleted entirely. Configure write.delete.mode table property to allow row level deletions."); | ||
| } | ||
|
|
@@ -1653,6 +1712,12 @@ public ConnectorTableHandle beginUpdate(ConnectorSession session, ConnectorTable | |
| IcebergTableHandle handle = (IcebergTableHandle) tableHandle; | ||
| Table icebergTable = getIcebergTable(session, handle.getSchemaTableName()); | ||
| int formatVersion = opsFromTable(icebergTable).current().formatVersion(); | ||
|
|
||
| if (formatVersion > MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS) { | ||
| throw new PrestoException(NOT_SUPPORTED, | ||
| format("Iceberg table updates for format version %s are not supported yet", formatVersion)); | ||
| } | ||
|
|
||
| if (formatVersion < MIN_FORMAT_VERSION_FOR_DELETE || | ||
| !Optional.ofNullable(icebergTable.properties().get(TableProperties.UPDATE_MODE)) | ||
| .map(mode -> mode.equals(MERGE_ON_READ.modeName())) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
issue (bug_risk): Catching RuntimeException broadly here may hide unexpected failures during snapshot retrieval.
Swallowing all RuntimeExceptions here will also hide programming/configuration errors that should surface or at least be observable. Consider catching a narrower, Iceberg-specific exception for the expected failure mode, or at minimum log the exception before returning so operational debugging remains possible.