-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Minor improvement for Iceberg S3 Tables #27666
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
c330785
fc2b636
7300a62
0170e05
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,7 @@ | |
| import io.trino.filesystem.FileEntry; | ||
| import io.trino.filesystem.FileIterator; | ||
| import io.trino.filesystem.Location; | ||
| import io.trino.filesystem.Locations; | ||
| import io.trino.filesystem.TrinoFileSystem; | ||
| import io.trino.metastore.Column; | ||
| import io.trino.metastore.HiveMetastore; | ||
|
|
@@ -49,6 +50,7 @@ | |
| import io.trino.plugin.iceberg.aggregation.DataSketchStateSerializer; | ||
| import io.trino.plugin.iceberg.aggregation.IcebergThetaSketchForStats; | ||
| import io.trino.plugin.iceberg.catalog.TrinoCatalog; | ||
| import io.trino.plugin.iceberg.catalog.rest.TrinoRestCatalog; | ||
| import io.trino.plugin.iceberg.functions.IcebergFunctionProvider; | ||
| import io.trino.plugin.iceberg.procedure.IcebergAddFilesFromTableHandle; | ||
| import io.trino.plugin.iceberg.procedure.IcebergAddFilesHandle; | ||
|
|
@@ -249,7 +251,6 @@ | |
| import static com.google.common.collect.Iterables.getOnlyElement; | ||
| import static com.google.common.collect.Maps.transformValues; | ||
| import static com.google.common.collect.Sets.difference; | ||
| import static io.trino.filesystem.Locations.isS3Tables; | ||
| import static io.trino.plugin.base.filter.UtcConstraintExtractor.extractTupleDomain; | ||
| import static io.trino.plugin.base.projection.ApplyProjectionUtil.extractSupportedProjectedColumns; | ||
| import static io.trino.plugin.base.projection.ApplyProjectionUtil.replaceWithNewVariables; | ||
|
|
@@ -1304,13 +1305,13 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con | |
|
|
||
| if (tableLocation == null) { | ||
| tableLocation = getTableLocation(tableMetadata.getProperties()) | ||
| .orElseGet(() -> catalog.defaultTableLocation(session, tableMetadata.getTable())); | ||
| .orElseGet(() -> catalog.defaultTableLocation(session, tableMetadata.getTable()).orElse(null)); | ||
| } | ||
| transaction = newCreateTableTransaction(catalog, tableMetadata, session, replace, tableLocation, allowedExtraProperties); | ||
| Location location = Location.of(transaction.table().location()); | ||
| try { | ||
| // S3 Tables internally assigns a unique location for each table | ||
| if (!isS3Tables(location.toString())) { | ||
| if (!Locations.isS3Tables(location.toString())) { | ||
| TrinoFileSystem fileSystem = fileSystemFactory.create(session.getIdentity(), transaction.table().io().properties()); | ||
| if (!replace && fileSystem.listFiles(location).hasNext()) { | ||
| throw new TrinoException(ICEBERG_FILESYSTEM_ERROR, format("" + | ||
|
|
@@ -1527,10 +1528,7 @@ public Optional<ConnectorOutputMetadata> finishInsert( | |
| appendFiles.scanManifestsWith(icebergScanExecutor); | ||
| commitUpdate(appendFiles, session, "insert"); | ||
|
|
||
| if (isS3Tables(icebergTable.location())) { | ||
| log.debug("S3 Tables do not support statistics: %s", table.name()); | ||
| } | ||
| else if (!computedStatistics.isEmpty()) { | ||
| if (!computedStatistics.isEmpty()) { | ||
| long newSnapshotId = icebergTable.currentSnapshot().snapshotId(); | ||
|
|
||
| CollectedStatistics collectedStatistics = processComputedTableStatistics(icebergTable, computedStatistics); | ||
|
|
@@ -2874,6 +2872,11 @@ public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(Connector | |
| return TableStatisticsMetadata.empty(); | ||
| } | ||
|
|
||
| if (isS3Tables()) { | ||
| // S3 Tables throw "Malformed request: Cannot parse missing field: statistics" error when we try to commit extended statistics | ||
| return TableStatisticsMetadata.empty(); | ||
| } | ||
|
|
||
| if (tableReplace) { | ||
| return getStatisticsCollectionMetadata(tableMetadata, Optional.empty(), availableColumnNames -> {}); | ||
| } | ||
|
|
@@ -2922,6 +2925,10 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata(ConnectorSession | |
| IcebergSessionProperties.EXTENDED_STATISTICS_ENABLED)); | ||
| } | ||
|
|
||
| if (isS3Tables()) { | ||
| throw new TrinoException(NOT_SUPPORTED, "S3 Tables do not support analyze"); | ||
| } | ||
|
|
||
| checkArgument(handle.getTableType() == DATA, "Cannot analyze non-DATA table: %s", handle.getTableType()); | ||
|
|
||
| if (handle.getSnapshotId().isEmpty()) { | ||
|
|
@@ -2987,9 +2994,6 @@ public ConnectorTableHandle beginStatisticsCollection(ConnectorSession session, | |
| { | ||
| IcebergTableHandle handle = (IcebergTableHandle) tableHandle; | ||
| Table icebergTable = catalog.loadTable(session, handle.getSchemaTableName()); | ||
| if (isS3Tables(icebergTable.location())) { | ||
| throw new TrinoException(NOT_SUPPORTED, "S3 Tables do not support analyze"); | ||
| } | ||
| beginTransaction(icebergTable); | ||
| return handle; | ||
| } | ||
|
|
@@ -4043,6 +4047,11 @@ public WriterScalingOptions getInsertWriterScalingOptions(ConnectorSession sessi | |
| return WriterScalingOptions.ENABLED; | ||
| } | ||
|
|
||
| private boolean isS3Tables() | ||
| { | ||
| return catalog instanceof TrinoRestCatalog restCatalog && restCatalog.isS3Tables(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that we have this new check, do we still need the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name "isS3Tables" is not clear to me, the method is using a catalog to check if it "isS3Tables", I don’t have a strong suggestion for an alternative name though |
||
| } | ||
|
|
||
| public Optional<Long> getIncrementalRefreshFromSnapshot() | ||
| { | ||
| return fromSnapshotForRefresh; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,6 @@ | |
| import io.trino.spi.connector.SchemaTableName; | ||
| import io.trino.spi.metrics.Metrics; | ||
| import io.trino.spi.security.TrinoPrincipal; | ||
| import jakarta.annotation.Nullable; | ||
| import org.apache.iceberg.BaseTable; | ||
| import org.apache.iceberg.PartitionSpec; | ||
| import org.apache.iceberg.Schema; | ||
|
|
@@ -161,8 +160,7 @@ Transaction newCreateOrReplaceTableTransaction( | |
|
|
||
| void updateViewColumnComment(ConnectorSession session, SchemaTableName schemaViewName, String columnName, Optional<String> comment); | ||
|
|
||
| @Nullable | ||
| String defaultTableLocation(ConnectorSession session, SchemaTableName schemaTableName); | ||
| Optional<String> defaultTableLocation(ConnectorSession session, SchemaTableName schemaTableName); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❤️ |
||
|
|
||
| void setTablePrincipal(ConnectorSession session, SchemaTableName schemaTableName, TrinoPrincipal principal); | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,6 +45,7 @@ | |
| import io.trino.spi.security.TrinoPrincipal; | ||
| import io.trino.spi.type.TypeManager; | ||
| import org.apache.iceberg.BaseTable; | ||
| import org.apache.iceberg.CatalogProperties; | ||
| import org.apache.iceberg.PartitionSpec; | ||
| import org.apache.iceberg.Schema; | ||
| import org.apache.iceberg.SortOrder; | ||
|
|
@@ -575,7 +576,7 @@ public void updateTableComment(ConnectorSession session, SchemaTableName schemaT | |
| } | ||
|
|
||
| @Override | ||
| public String defaultTableLocation(ConnectorSession session, SchemaTableName schemaTableName) | ||
| public Optional<String> defaultTableLocation(ConnectorSession session, SchemaTableName schemaTableName) | ||
| { | ||
| String tableName = createLocationForTable(schemaTableName.getTableName()); | ||
|
|
||
|
|
@@ -584,10 +585,10 @@ public String defaultTableLocation(ConnectorSession session, SchemaTableName sch | |
| if (databaseLocation == null) { | ||
| // Iceberg REST catalog doesn't require location property. | ||
| // S3 Tables doesn't return the property. | ||
| return null; | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
| return appendPath(databaseLocation, tableName); | ||
| return Optional.of(appendPath(databaseLocation, tableName)); | ||
| } | ||
|
|
||
| private String createLocationForTable(String baseTableName) | ||
|
|
@@ -618,7 +619,7 @@ public void createView(ConnectorSession session, SchemaTableName schemaViewName, | |
| .withDefaultNamespace(toRemoteNamespace(session, toNamespace(schemaViewName.getSchemaName()))) | ||
| .withDefaultCatalog(definition.getCatalog().orElse(null)) | ||
| .withProperties(properties.buildOrThrow()) | ||
| .withLocation(defaultTableLocation(session, schemaViewName)); | ||
| .withLocation(defaultTableLocation(session, schemaViewName).orElse(null)); | ||
| try { | ||
| if (replace) { | ||
| viewBuilder.createOrReplace(); | ||
|
|
@@ -827,6 +828,12 @@ public void updateViewColumnComment(ConnectorSession session, SchemaTableName sc | |
| replaceViewVersion.commit(); | ||
| } | ||
|
|
||
| public boolean isS3Tables() | ||
| { | ||
| String warehouse = restSessionCatalog.properties().get(CatalogProperties.WAREHOUSE_LOCATION); | ||
| return warehouse != null && warehouse.startsWith("s3tablescatalog/") && "sigv4".equals(restSessionCatalog.properties().get("rest.auth.type")); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth code comment where did |
||
| } | ||
|
|
||
| private SessionCatalog.SessionContext convert(ConnectorSession session) | ||
| { | ||
| return switch (sessionType) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,6 +50,7 @@ | |
| import static io.trino.plugin.iceberg.catalog.jdbc.TestingIcebergJdbcServer.PASSWORD; | ||
| import static io.trino.plugin.iceberg.catalog.jdbc.TestingIcebergJdbcServer.USER; | ||
| import static io.trino.plugin.iceberg.catalog.rest.RestCatalogTestUtils.backendCatalog; | ||
| import static io.trino.testing.SystemEnvironmentUtils.requireEnv; | ||
| import static io.trino.testing.TestingProperties.requiredNonEmptySystemProperty; | ||
| import static io.trino.testing.TestingSession.testSessionBuilder; | ||
| import static io.trino.testing.containers.Minio.MINIO_ACCESS_KEY; | ||
|
|
@@ -284,6 +285,34 @@ static void main() | |
| } | ||
| } | ||
|
|
||
| public static final class IcebergS3TablesQueryRunnerMain | ||
| { | ||
| private IcebergS3TablesQueryRunnerMain() {} | ||
|
|
||
| static void main() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❤️ |
||
| throws Exception | ||
| { | ||
| @SuppressWarnings("resource") | ||
| QueryRunner queryRunner = IcebergQueryRunner.builder("tpch") | ||
| .addCoordinatorProperty("http-server.http.port", "8080") | ||
| .addIcebergProperty("iceberg.catalog.type", "rest") | ||
| .addIcebergProperty("iceberg.rest-catalog.uri", "https://glue.%s.amazonaws.com/iceberg".formatted(requireEnv("AWS_REGION"))) | ||
| .addIcebergProperty("iceberg.rest-catalog.warehouse", "s3tablescatalog/" + requireEnv("S3_TABLES_BUCKET")) | ||
| .addIcebergProperty("iceberg.rest-catalog.view-endpoints-enabled", "false") | ||
| .addIcebergProperty("iceberg.rest-catalog.security", "sigv4") | ||
| .addIcebergProperty("iceberg.rest-catalog.signing-name", "glue") | ||
| .addIcebergProperty("fs.native-s3.enabled", "true") | ||
| .addIcebergProperty("s3.region", requireEnv("AWS_REGION")) | ||
| .addIcebergProperty("s3.aws-access-key", requireEnv("AWS_ACCESS_KEY_ID")) | ||
| .addIcebergProperty("s3.aws-secret-key", requireEnv("AWS_SECRET_ACCESS_KEY")) | ||
| .build(); | ||
|
|
||
| Logger log = Logger.get(IcebergS3TablesQueryRunnerMain.class); | ||
| log.info("======== SERVER STARTED ========"); | ||
| log.info("\n====\n%s\n====", queryRunner.getCoordinator().getBaseUrl()); | ||
| } | ||
| } | ||
|
|
||
| public static final class IcebergExternalQueryRunnerMain | ||
| { | ||
| private IcebergExternalQueryRunnerMain() {} | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
statisticsis part of Iceberg spec for a while now, it's sad it's not supported by s3tables, because it makes queries inherently less performant on top of these tables. cc @pettyjamesmIs there a public tracker link for this important and missing functionality?
(i mean AWS's issue, not an issue in Trino)