-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Add support to redirect table operations from Iceberg to Hive #11356
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,6 +63,7 @@ | |
| import io.trino.spi.connector.SchemaTableName; | ||
| import io.trino.spi.connector.SchemaTablePrefix; | ||
| import io.trino.spi.connector.SystemTable; | ||
| import io.trino.spi.connector.TableColumnsMetadata; | ||
| import io.trino.spi.connector.TableNotFoundException; | ||
| import io.trino.spi.expression.ConnectorExpression; | ||
| import io.trino.spi.expression.Variable; | ||
|
|
@@ -115,6 +116,7 @@ | |
| import java.util.function.Supplier; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
| import java.util.stream.Stream; | ||
|
|
||
| import static com.google.common.base.Verify.verify; | ||
| import static com.google.common.collect.ImmutableList.toImmutableList; | ||
|
|
@@ -151,7 +153,6 @@ | |
| import static io.trino.spi.connector.RetryMode.NO_RETRIES; | ||
| import static io.trino.spi.type.BigintType.BIGINT; | ||
| import static java.lang.String.format; | ||
| import static java.util.Collections.singletonList; | ||
| import static java.util.Objects.requireNonNull; | ||
| import static java.util.function.Function.identity; | ||
| import static java.util.stream.Collectors.joining; | ||
|
|
@@ -206,7 +207,10 @@ public Optional<TrinoPrincipal> getSchemaOwner(ConnectorSession session, Catalog | |
| public IcebergTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName) | ||
| { | ||
| IcebergTableName name = IcebergTableName.from(tableName.getTableName()); | ||
| verify(name.getTableType() == DATA, "Wrong table type: " + name.getTableNameWithType()); | ||
| if (name.getTableType() != DATA) { | ||
| // Pretend the table does not exist to produce better error message in case of table redirects to Hive | ||
| return null; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When the table redirection towards Hive connector is not enabled, From
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. keeping
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keeping error messages like the following: I tend to say that the error message fits better (not ideal, but better).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This requires code comment
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to make it clear from the code perspective:
In Probably a refactoring of |
||
| } | ||
|
|
||
| Table table; | ||
| try { | ||
|
|
@@ -236,6 +240,7 @@ public Optional<SystemTable> getSystemTable(ConnectorSession session, SchemaTabl | |
| .map(systemTable -> new ClassLoaderSafeSystemTable(systemTable, getClass().getClassLoader())); | ||
| } | ||
|
|
||
| @SuppressWarnings("TryWithIdenticalCatches") | ||
| private Optional<SystemTable> getRawSystemTable(ConnectorSession session, SchemaTableName tableName) | ||
| { | ||
| IcebergTableName name = IcebergTableName.from(tableName.getTableName()); | ||
|
|
@@ -251,6 +256,10 @@ private Optional<SystemTable> getRawSystemTable(ConnectorSession session, Schema | |
| catch (TableNotFoundException e) { | ||
| return Optional.empty(); | ||
| } | ||
| catch (UnknownTableTypeException e) { | ||
| // avoid dealing with non Iceberg tables | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| SchemaTableName systemTableName = new SchemaTableName(tableName.getSchemaName(), name.getTableNameWithType()); | ||
| switch (name.getTableType()) { | ||
|
|
@@ -395,27 +404,43 @@ public ColumnMetadata getColumnMetadata(ConnectorSession session, ConnectorTable | |
| @Override | ||
| public Map<SchemaTableName, List<ColumnMetadata>> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) | ||
| { | ||
| List<SchemaTableName> tables = prefix.getTable() | ||
| .map(ignored -> singletonList(prefix.toSchemaTableName())) | ||
| .orElseGet(() -> listTables(session, prefix.getSchema())); | ||
| throw new UnsupportedOperationException("The deprecated listTableColumns is not supported because streamTableColumns is implemented instead"); | ||
| } | ||
|
|
||
| ImmutableMap.Builder<SchemaTableName, List<ColumnMetadata>> columns = ImmutableMap.builder(); | ||
| for (SchemaTableName table : tables) { | ||
| try { | ||
| columns.put(table, getTableMetadata(session, table).getColumns()); | ||
| } | ||
| catch (TableNotFoundException e) { | ||
| // table disappeared during listing operation | ||
| } | ||
| catch (UnknownTableTypeException e) { | ||
| // ignore table of unknown type | ||
| } | ||
| catch (RuntimeException e) { | ||
| // Table can be being removed and this may cause all sorts of exceptions. Log, because we're catching broadly. | ||
| log.warn(e, "Failed to access metadata of table %s during column listing for %s", table, prefix); | ||
| } | ||
| @Override | ||
| @SuppressWarnings("TryWithIdenticalCatches") | ||
| public Stream<TableColumnsMetadata> streamTableColumns(ConnectorSession session, SchemaTablePrefix prefix) | ||
| { | ||
| requireNonNull(prefix, "prefix is null"); | ||
| List<SchemaTableName> schemaTableNames; | ||
| if (prefix.getTable().isEmpty()) { | ||
| schemaTableNames = catalog.listTables(session, prefix.getSchema()); | ||
| } | ||
| else { | ||
| schemaTableNames = ImmutableList.of(prefix.toSchemaTableName()); | ||
| } | ||
|
findinpath marked this conversation as resolved.
Outdated
|
||
| return columns.buildOrThrow(); | ||
| return schemaTableNames.stream() | ||
| .flatMap(tableName -> { | ||
| try { | ||
| if (redirectTable(session, tableName).isPresent()) { | ||
| return Stream.of(TableColumnsMetadata.forRedirectedTable(tableName)); | ||
| } | ||
| return Stream.of(TableColumnsMetadata.forTable(tableName, getTableMetadata(session, tableName).getColumns())); | ||
| } | ||
| catch (TableNotFoundException e) { | ||
| // Table disappeared during listing operation | ||
| return Stream.empty(); | ||
| } | ||
| catch (UnknownTableTypeException e) { | ||
| // Skip unsupported table type in case that the table redirects are not enabled | ||
| return Stream.empty(); | ||
| } | ||
| catch (RuntimeException e) { | ||
| // Table can be being removed and this may cause all sorts of exceptions. Log, because we're catching broadly. | ||
| log.warn(e, "Failed to access metadata of table %s during streaming table columns for %s", tableName, prefix); | ||
| return Stream.empty(); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -1394,6 +1419,12 @@ private Map<String, Optional<TableToken>> getMaterializedViewToken(ConnectorSess | |
| return viewToken; | ||
| } | ||
|
|
||
| @Override | ||
| public Optional<CatalogSchemaTableName> redirectTable(ConnectorSession session, SchemaTableName tableName) | ||
| { | ||
| return catalog.redirectTable(session, tableName); | ||
| } | ||
|
|
||
| private static class TableToken | ||
| { | ||
| // Current Snapshot ID of the table | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,7 @@ | |
| import io.trino.plugin.iceberg.catalog.AbstractTrinoCatalog; | ||
| import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; | ||
| import io.trino.spi.TrinoException; | ||
| import io.trino.spi.connector.CatalogSchemaTableName; | ||
| import io.trino.spi.connector.ConnectorMaterializedViewDefinition; | ||
| import io.trino.spi.connector.ConnectorSession; | ||
| import io.trino.spi.connector.ConnectorViewDefinition; | ||
|
|
@@ -74,16 +75,22 @@ | |
| import static io.trino.plugin.hive.ViewReaderUtil.encodeViewData; | ||
| import static io.trino.plugin.hive.ViewReaderUtil.isPrestoView; | ||
| import static io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults; | ||
| import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; | ||
| import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_CATALOG_ERROR; | ||
| import static io.trino.plugin.iceberg.IcebergSchemaProperties.LOCATION_PROPERTY; | ||
| import static io.trino.plugin.iceberg.IcebergSessionProperties.getHiveCatalogName; | ||
| import static io.trino.plugin.iceberg.IcebergUtil.getIcebergTableWithMetadata; | ||
| import static io.trino.plugin.iceberg.IcebergUtil.quotedTableName; | ||
| import static io.trino.plugin.iceberg.IcebergUtil.validateTableCanBeDropped; | ||
| import static io.trino.plugin.iceberg.catalog.glue.GlueIcebergUtil.getTableInput; | ||
| import static io.trino.plugin.iceberg.catalog.glue.GlueIcebergUtil.getViewTableInput; | ||
| import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; | ||
| import static io.trino.spi.connector.SchemaTableName.schemaTableName; | ||
| import static java.lang.String.format; | ||
| import static java.util.Objects.requireNonNull; | ||
| import static org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW; | ||
| import static org.apache.iceberg.BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE; | ||
| import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP; | ||
| import static org.apache.iceberg.CatalogUtil.dropTableData; | ||
|
|
||
| public class TrinoGlueCatalog | ||
|
|
@@ -575,4 +582,40 @@ public void renameMaterializedView(ConnectorSession session, SchemaTableName sou | |
| { | ||
| throw new TrinoException(NOT_SUPPORTED, "renameMaterializedView is not supported for Iceberg Glue catalogs"); | ||
| } | ||
|
|
||
| @Override | ||
| public Optional<CatalogSchemaTableName> redirectTable(ConnectorSession session, SchemaTableName tableName) | ||
| { | ||
| requireNonNull(session, "session is null"); | ||
| requireNonNull(tableName, "tableName is null"); | ||
| Optional<String> targetCatalogName = getHiveCatalogName(session); | ||
| if (targetCatalogName.isEmpty()) { | ||
| return Optional.empty(); | ||
| } | ||
| if (isHiveSystemSchema(tableName.getSchemaName())) { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| // we need to chop off any "$partitions" and similar suffixes from table name while querying the metastore for the Table object | ||
| int metadataMarkerIndex = tableName.getTableName().lastIndexOf('$'); | ||
| SchemaTableName tableNameBase = (metadataMarkerIndex == -1) ? tableName : schemaTableName( | ||
| tableName.getSchemaName(), | ||
| tableName.getTableName().substring(0, metadataMarkerIndex)); | ||
|
|
||
| Optional<com.amazonaws.services.glue.model.Table> table = getTable(new SchemaTableName(tableNameBase.getSchemaName(), tableNameBase.getTableName())); | ||
|
|
||
| if (table.isEmpty() || VIRTUAL_VIEW.name().equals(table.get().getTableType())) { | ||
| return Optional.empty(); | ||
| } | ||
| if (!isIcebergTable(table.get())) { | ||
| // After redirecting, use the original table name, with "$partitions" and similar suffixes | ||
| return targetCatalogName.map(catalog -> new CatalogSchemaTableName(catalog, tableName)); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit, I'd use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do have a few lines above: Also note that the method returns an |
||
| } | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| private static boolean isIcebergTable(com.amazonaws.services.glue.model.Table table) | ||
|
findepi marked this conversation as resolved.
Outdated
|
||
| { | ||
| return ICEBERG_TABLE_TYPE_VALUE.equalsIgnoreCase(table.getParameters().get(TABLE_TYPE_PROP)); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.