Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.Transaction;
import org.apache.iceberg.UpdatePartitionSpec;
Expand Down Expand Up @@ -189,6 +190,7 @@
import static com.facebook.presto.iceberg.IcebergTableType.CHANGELOG;
import static com.facebook.presto.iceberg.IcebergTableType.DATA;
import static com.facebook.presto.iceberg.IcebergTableType.EQUALITY_DELETES;
import static com.facebook.presto.iceberg.IcebergUtil.MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS;
import static com.facebook.presto.iceberg.IcebergUtil.MIN_FORMAT_VERSION_FOR_DELETE;
import static com.facebook.presto.iceberg.IcebergUtil.getColumns;
import static com.facebook.presto.iceberg.IcebergUtil.getColumnsForWrite;
Expand Down Expand Up @@ -357,6 +359,48 @@ public Optional<IcebergProcedureContext> getProcedureContext()
return this.procedureContext;
}

protected static void validateTableForPresto(BaseTable table, Optional<Long> tableSnapshotId)
{
Snapshot snapshot;
try {
snapshot = tableSnapshotId
.map(table::snapshot)
.orElse(table.currentSnapshot());
}
catch (RuntimeException e) {
// If the snapshot cannot be retrieved (e.g. metadata is missing), we cannot validate the table.
Comment on lines +370 to +371
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): Catching RuntimeException broadly here may hide unexpected failures during snapshot retrieval.

Swallowing all RuntimeExceptions here will also hide programming/configuration errors that should surface or at least be observable. Consider catching a narrower, Iceberg-specific exception for the expected failure mode, or at minimum log the exception before returning so operational debugging remains possible.

// Returning here allows operations that do not strictly require the snapshot (like DROP TABLE) to proceed.
return;
}

if (snapshot == null) {
// empty table, nothing to validate
return;
}

TableMetadata metadata = table.operations().current();
if (metadata.formatVersion() < 3) {
return;
}

Schema schema = metadata.schemasById().get(snapshot.schemaId());
if (schema == null) {
schema = metadata.schema();
}

// Reject schema default values (initial-default / write-default)
for (Types.NestedField field : schema.columns()) {
Comment on lines +391 to +392
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Column default validation only considers top-level columns and may miss nested fields

schema.columns() only iterates top-level fields, so defaults on nested struct fields (if Iceberg permits initialDefault / writeDefault there) would bypass this check. Either traverse the full schema recursively to validate nested Types.NestedFields, or explicitly confirm/document that defaults are only allowed on top-level fields.

if (field.initialDefault() != null || field.writeDefault() != null) {
throw new PrestoException(NOT_SUPPORTED, "Iceberg v3 column default values are not supported");
}
}

// Reject Iceberg table encryption
if (!metadata.encryptionKeys().isEmpty() || snapshot.keyId() != null || metadata.properties().containsKey("encryption.key-id")) {
throw new PrestoException(NOT_SUPPORTED, "Iceberg table encryption is not supported");
}
}

/**
* This class implements the default implementation for getTableLayoutForConstraint which will be used in the case of a Java Worker
*/
Expand Down Expand Up @@ -829,6 +873,11 @@ public ConnectorMergeTableHandle beginMerge(ConnectorSession session, ConnectorT
Table icebergTable = getIcebergTable(session, icebergTableHandle.getSchemaTableName());
int formatVersion = ((BaseTable) icebergTable).operations().current().formatVersion();

if (formatVersion > MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS) {
throw new PrestoException(NOT_SUPPORTED,
format("Iceberg table updates for format version %s are not supported yet", formatVersion));
}

if (formatVersion < MIN_FORMAT_VERSION_FOR_DELETE ||
!Optional.ofNullable(icebergTable.properties().get(TableProperties.UPDATE_MODE))
.map(mode -> mode.equals(MERGE_ON_READ.modeName()))
Expand Down Expand Up @@ -1302,6 +1351,12 @@ public IcebergTableHandle getTableHandle(ConnectorSession session, SchemaTableNa
})
.orElseGet(() -> resolveSnapshotIdByName(table, name));

// Validate unsupported v3 features (column defaults, encryption) before
// proceeding
if (table instanceof BaseTable) {
validateTableForPresto((BaseTable) table, tableSnapshotId);
}

// Get Iceberg tables schema, properties, and location with missing
// filesystem metadata will fail.
// See https://github.com/prestodb/presto/pull/21181
Expand Down Expand Up @@ -1404,6 +1459,10 @@ public ConnectorDeleteTableHandle beginDelete(ConnectorSession session, Connecto
if (formatVersion < MIN_FORMAT_VERSION_FOR_DELETE) {
throw new PrestoException(NOT_SUPPORTED, format("This connector only supports delete where one or more partitions are deleted entirely for table versions older than %d", MIN_FORMAT_VERSION_FOR_DELETE));
}
if (formatVersion > MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS) {
throw new PrestoException(NOT_SUPPORTED,
format("Iceberg table updates for format version %s are not supported yet", formatVersion));
}
if (getDeleteMode(icebergTable) == RowLevelOperationMode.COPY_ON_WRITE) {
throw new PrestoException(NOT_SUPPORTED, "This connector only supports delete where one or more partitions are deleted entirely. Configure write.delete.mode table property to allow row level deletions.");
}
Expand Down Expand Up @@ -1653,6 +1712,12 @@ public ConnectorTableHandle beginUpdate(ConnectorSession session, ConnectorTable
IcebergTableHandle handle = (IcebergTableHandle) tableHandle;
Table icebergTable = getIcebergTable(session, handle.getSchemaTableName());
int formatVersion = opsFromTable(icebergTable).current().formatVersion();

if (formatVersion > MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS) {
throw new PrestoException(NOT_SUPPORTED,
format("Iceberg table updates for format version %s are not supported yet", formatVersion));
}

if (formatVersion < MIN_FORMAT_VERSION_FOR_DELETE ||
!Optional.ofNullable(icebergTable.properties().get(TableProperties.UPDATE_MODE))
.map(mode -> mode.equals(MERGE_ON_READ.modeName()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorSplit;
import com.facebook.presto.spi.ConnectorSplitSource;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SplitWeight;
import com.facebook.presto.spi.connector.ConnectorPartitionHandle;
import com.facebook.presto.spi.schedule.NodeSelectionStrategy;
Expand Down Expand Up @@ -46,6 +47,7 @@
import static com.facebook.presto.iceberg.IcebergUtil.getTargetSplitSize;
import static com.facebook.presto.iceberg.IcebergUtil.metadataColumnsMatchPredicates;
import static com.facebook.presto.iceberg.IcebergUtil.partitionDataFromStructLike;
import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.Iterators.limit;
import static java.util.Objects.requireNonNull;
Expand Down Expand Up @@ -124,6 +126,13 @@ private ConnectorSplit toIcebergSplit(FileScanTask task)
PartitionSpec spec = task.spec();
Optional<PartitionData> partitionData = partitionDataFromStructLike(spec, task.file().partition());

// Validate no PUFFIN deletion vectors (Iceberg v3 feature not yet supported)
for (org.apache.iceberg.DeleteFile deleteFile : task.deletes()) {
if (deleteFile.format() == org.apache.iceberg.FileFormat.PUFFIN) {
throw new PrestoException(NOT_SUPPORTED, "Iceberg deletion vectors (PUFFIN format) are not supported");
}
}

// TODO: We should leverage residual expression and convert that to TupleDomain.
// The predicate here is used by readers for predicate push down at reader level,
// so when we do not use residual expression, we are just wasting CPU cycles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ public final class IcebergUtil
{
private static final Logger log = Logger.get(IcebergUtil.class);
public static final int MIN_FORMAT_VERSION_FOR_DELETE = 2;
public static final int MAX_FORMAT_VERSION_FOR_ROW_LEVEL_OPERATIONS = 2;
public static final int MAX_SUPPORTED_FORMAT_VERSION = 3;

public static final long DOUBLE_POSITIVE_ZERO = 0x0000000000000000L;
public static final long DOUBLE_POSITIVE_INFINITE = 0x7ff0000000000000L;
Expand Down Expand Up @@ -1188,7 +1190,11 @@ public static Map<String, String> populateTableProperties(IcebergAbstractMetadat
public static int parseFormatVersion(String formatVersion)
{
try {
return parseInt(formatVersion);
int version = parseInt(formatVersion);
if (version > MAX_SUPPORTED_FORMAT_VERSION) {
throw new PrestoException(NOT_SUPPORTED, format("Iceberg table format version %d is not supported", version));
}
return version;
}
catch (NumberFormatException | IndexOutOfBoundsException e) {
throw new PrestoException(ICEBERG_INVALID_FORMAT_VERSION, "Unable to parse user provided format version");
Expand Down
Loading
Loading