-
Notifications
You must be signed in to change notification settings - Fork 5.5k
feat(iceberg): Add $snapshot_id as hidden column in iceberg table #26189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,8 @@ | |
| import com.facebook.airlift.log.Logger; | ||
| import com.facebook.presto.common.RuntimeStats; | ||
| import com.facebook.presto.common.Subfield; | ||
| import com.facebook.presto.common.predicate.Domain; | ||
| import com.facebook.presto.common.predicate.Range; | ||
| import com.facebook.presto.common.predicate.TupleDomain; | ||
| import com.facebook.presto.common.type.BigintType; | ||
| import com.facebook.presto.common.type.SqlTimestampWithTimeZone; | ||
|
|
@@ -139,12 +141,15 @@ | |
| import static com.facebook.presto.iceberg.IcebergColumnHandle.IS_DELETED_COLUMN_METADATA; | ||
| import static com.facebook.presto.iceberg.IcebergColumnHandle.PATH_COLUMN_HANDLE; | ||
| import static com.facebook.presto.iceberg.IcebergColumnHandle.PATH_COLUMN_METADATA; | ||
| import static com.facebook.presto.iceberg.IcebergColumnHandle.SNAPSHOT_ID_COLUMN_HANDLE; | ||
| import static com.facebook.presto.iceberg.IcebergColumnHandle.SNAPSHOT_ID_COLUMN_METADATA; | ||
| import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_COMMIT_ERROR; | ||
| import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_INVALID_SNAPSHOT_ID; | ||
| import static com.facebook.presto.iceberg.IcebergMetadataColumn.DATA_SEQUENCE_NUMBER; | ||
| import static com.facebook.presto.iceberg.IcebergMetadataColumn.DELETE_FILE_PATH; | ||
| import static com.facebook.presto.iceberg.IcebergMetadataColumn.FILE_PATH; | ||
| import static com.facebook.presto.iceberg.IcebergMetadataColumn.IS_DELETED; | ||
| import static com.facebook.presto.iceberg.IcebergMetadataColumn.SNAPSHOT_ID; | ||
| import static com.facebook.presto.iceberg.IcebergMetadataColumn.UPDATE_ROW_DATA; | ||
| import static com.facebook.presto.iceberg.IcebergPartitionType.ALL; | ||
| import static com.facebook.presto.iceberg.IcebergSessionProperties.getCompressionCodec; | ||
|
|
@@ -282,6 +287,42 @@ public ConnectorTableLayoutResult getTableLayoutForConstraint( | |
|
|
||
| IcebergTableHandle handle = (IcebergTableHandle) table; | ||
| Table icebergTable = getIcebergTable(session, handle.getSchemaTableName()); | ||
| IcebergTableName name = IcebergTableName.from(handle.getTableName()); | ||
|
|
||
| Map<ColumnHandle, Domain> domains = constraint.getSummary().getDomains().orElse(Collections.emptyMap()); | ||
| for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) { | ||
| IcebergColumnHandle column = (IcebergColumnHandle) entry.getKey(); | ||
|
|
||
| if (column.getName().equalsIgnoreCase("$snapshot_id")) { | ||
| Domain domain = entry.getValue(); | ||
|
|
||
| if (domain.isSingleValue()) { | ||
| Optional<Long> snapshotId = Optional.of(((Number) domain.getSingleValue()).longValue()); | ||
| handle = handle.withUpdatedIcebergTableName( | ||
| new IcebergTableName(name.getTableName(), name.getTableType(), snapshotId, name.getChangelogEndSnapshot())); | ||
| } | ||
| else if (domain.getValues().getRanges().getOrderedRanges().size() == 1) { | ||
| Range range = domain.getValues().getRanges().getOrderedRanges().get(0); | ||
| if (range.isSingleValue()) { | ||
| Optional<Long> snapshotId = Optional.of(((Number) range.getSingleValue()).longValue()); | ||
| handle = handle.withUpdatedIcebergTableName( | ||
| new IcebergTableName(name.getTableName(), name.getTableType(), snapshotId, name.getChangelogEndSnapshot())); | ||
| } | ||
| else if (!range.isLowUnbounded() && range.isLowInclusive() && range.isHighUnbounded()) { | ||
| // Only support >= X | ||
| Optional<Long> lower = Optional.of(((Number) range.getLowBoundedValue()).longValue()); | ||
| handle = handle.withUpdatedIcebergTableName( | ||
| new IcebergTableName(name.getTableName(), name.getTableType(), lower, name.getChangelogEndSnapshot())); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tdcmeehan I wanted to confirm a point here - |
||
| } | ||
| else { | ||
| throw new PrestoException(NOT_SUPPORTED, "Unsupported predicate for $snapshot_id; only >= constant is allowed"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we change the message to |
||
| } | ||
| } | ||
| else { | ||
| throw new PrestoException(NOT_SUPPORTED, "Unsupported complex predicate for $snapshot_id; only >= constant is allowed"); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| List<IcebergColumnHandle> partitionColumns = getPartitionKeyColumnHandles(handle, icebergTable, typeManager); | ||
| TupleDomain<ColumnHandle> partitionColumnPredicate = TupleDomain.withColumnDomains(Maps.filterKeys(constraint.getSummary().getDomains().get(), Predicates.in(partitionColumns))); | ||
|
|
@@ -444,6 +485,7 @@ protected ConnectorTableMetadata getTableOrViewMetadata(ConnectorSession session | |
| columns.add(DATA_SEQUENCE_NUMBER_COLUMN_METADATA); | ||
| columns.add(IS_DELETED_COLUMN_METADATA); | ||
| columns.add(DELETE_FILE_PATH_COLUMN_METADATA); | ||
| columns.add(SNAPSHOT_ID_COLUMN_METADATA); | ||
| } | ||
| return new ConnectorTableMetadata(table, columns.build(), createMetadataProperties(icebergTable, session), getTableComment(icebergTable)); | ||
| } | ||
|
|
@@ -955,6 +997,7 @@ public Map<String, ColumnHandle> getColumnHandles(ConnectorSession session, Conn | |
| columnHandles.put(DATA_SEQUENCE_NUMBER.getColumnName(), DATA_SEQUENCE_NUMBER_COLUMN_HANDLE); | ||
| columnHandles.put(IS_DELETED.getColumnName(), IS_DELETED_COLUMN_HANDLE); | ||
| columnHandles.put(DELETE_FILE_PATH.getColumnName(), DELETE_FILE_PATH_COLUMN_HANDLE); | ||
| columnHandles.put(SNAPSHOT_ID.getColumnName(), SNAPSHOT_ID_COLUMN_HANDLE); | ||
| } | ||
| return columnHandles.build(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -93,7 +93,7 @@ public ConnectorSplitSource getSplits( | |
| .metricsReporter(new RuntimeStatsMetricsReporter(session.getRuntimeStats())) | ||
| .fromSnapshotExclusive(fromSnapshot) | ||
| .toSnapshot(toSnapshot); | ||
| return new ChangelogSplitSource(session, typeManager, icebergTable, scan); | ||
| return new ChangelogSplitSource(session, typeManager, icebergTable, scan, toSnapshot); | ||
| } | ||
| else if (table.getIcebergTableName().getTableType() == EQUALITY_DELETES) { | ||
| CloseableIterable<DeleteFile> deleteFiles = IcebergUtil.getDeleteFiles(icebergTable, | ||
|
|
@@ -103,7 +103,7 @@ else if (table.getIcebergTableName().getTableType() == EQUALITY_DELETES) { | |
| table.getEqualityFieldIds(), | ||
| session.getRuntimeStats()); | ||
|
|
||
| return new EqualityDeletesSplitSource(session, icebergTable, deleteFiles); | ||
| return new EqualityDeletesSplitSource(session, icebergTable, deleteFiles, table.getIcebergTableName().getSnapshotId().get()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. getSnapshotId() returns Optional. |
||
| } | ||
| else { | ||
| TableScan tableScan = icebergTable.newScan() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,8 +62,8 @@ public class IcebergSplitSource | |
| private final long targetSplitSize; | ||
| private final NodeSelectionStrategy nodeSelectionStrategy; | ||
| private final long affinitySchedulingFileSectionSize; | ||
|
|
||
| private final TupleDomain<IcebergColumnHandle> metadataColumnConstraints; | ||
| private final long snapshotId; | ||
|
|
||
| public IcebergSplitSource( | ||
| ConnectorSession session, | ||
|
|
@@ -81,6 +81,7 @@ public IcebergSplitSource( | |
| closer.register(tableScan.planFiles()), | ||
| targetSplitSize) | ||
| .iterator()); | ||
| this.snapshotId = tableScan.snapshot().snapshotId(); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -143,6 +144,7 @@ private ConnectorSplit toIcebergSplit(FileScanTask task) | |
| task.deletes().stream().map(DeleteFile::fromIceberg).collect(toImmutableList()), | ||
| Optional.empty(), | ||
| getDataSequenceNumber(task.file()), | ||
| affinitySchedulingFileSectionSize); | ||
| affinitySchedulingFileSectionSize, | ||
| snapshotId); | ||
|
Comment on lines
-146
to
+148
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit concerned about the snapshotId selection here. It seems like we are using the table-level snapshotId taken when the entire table was scanned, but my understanding is that it should be the snapshotId calculated based on the corresponding data file and delete files, right?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hantangwangd this is an important observation. Selecting this column would be more useful if it returned the snapshot ID of the data file, i.e. which snapshot ID created the file. However, this column is primarily intended for filtering, as a way of altering the table handle to force a time travel on the table without introducing a new SPI or connector optimizer. Given this column will be hidden and not intended for direct use, I am comfortable with this being the snapshot ID of the scan, as that fulfills the intended purpose.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tdcmeehan thanks for the detailed explanation. Based on my understanding of PR #26164 and the comments here, the primary purpose of this |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2254,6 +2254,53 @@ public void testDeleteFilePathHiddenColumn() | |
| }); | ||
| } | ||
|
|
||
| @Test | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also add a case where there are multiple snapshots? |
||
| public void testSnapshotIdHiddenColumnSimple() | ||
| { | ||
| String tableName = "test_snapshot_id_hidden_" + randomTableSuffix(); | ||
|
|
||
| assertUpdate("DROP TABLE IF EXISTS " + tableName); | ||
|
|
||
| assertUpdate("CREATE TABLE " + tableName + " AS SELECT * FROM tpch.tiny.region WHERE regionkey=0", 1); | ||
| assertUpdate("INSERT INTO " + tableName + " SELECT * FROM tpch.tiny.region WHERE regionkey=1", 1); | ||
| Table icebergTable = loadTable(tableName); | ||
|
|
||
| assertEquals( | ||
| computeActual("SELECT COUNT(DISTINCT \"$snapshot_id\") FROM " + tableName).getOnlyValue(), | ||
| 1L, | ||
| "Scan should return a single $snapshot_id"); | ||
|
|
||
| Long snapshotIdFromQuery = (Long) computeActual("SELECT \"$snapshot_id\" FROM " + tableName + " LIMIT 1").getOnlyValue(); | ||
| assertEquals(snapshotIdFromQuery, icebergTable.currentSnapshot().snapshotId()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testSnapshotIdPredicatePushdown() | ||
| { | ||
| String tableName = "test_snapshot_id_pred_pushdown_" + randomTableSuffix(); | ||
|
|
||
| assertUpdate("DROP TABLE IF EXISTS " + tableName); | ||
|
|
||
| assertUpdate("CREATE TABLE " + tableName + "(id int, data varchar)"); | ||
| assertUpdate("INSERT INTO " + tableName + " VALUES (1, 'a')", 1); | ||
|
|
||
| Long snapshotId = (Long) computeActual("SELECT \"$snapshot_id\" FROM " + tableName + " LIMIT 1").getOnlyValue(); | ||
| loadTable(tableName).refresh(); | ||
|
|
||
| // Single value predicate | ||
| assertQuery("SELECT COUNT(*) FROM " + tableName + " WHERE \"$snapshot_id\" = " + snapshotId, "VALUES 1"); | ||
|
|
||
| // Range predicate >= | ||
| assertQuery("SELECT COUNT(*) FROM " + tableName + " WHERE \"$snapshot_id\" >= " + snapshotId, "VALUES 1"); | ||
|
|
||
| // Unsupported predicate | ||
| assertQueryFails("SELECT * FROM " + tableName + " WHERE \"$snapshot_id\" < " + snapshotId, | ||
| "Unsupported predicate for \\$snapshot_id; only >= constant is allowed"); | ||
|
|
||
| // BETWEEN same value | ||
| assertQuery("SELECT COUNT(*) FROM " + tableName + " WHERE \"$snapshot_id\" BETWEEN " + snapshotId + " AND " + snapshotId, "VALUES 1"); | ||
| } | ||
|
|
||
| @Test(dataProvider = "equalityDeleteOptions") | ||
| public void testEqualityDeletesWithDeletedHiddenColumn(String fileFormat, boolean joinRewriteEnabled) | ||
| throws Exception | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if we are querying time travel tables? Here we will always overwrite the snapshotId.
Probably we should add some check for the snapshot in predicate and the snapshot specified in time travel.