diff --git a/docs/src/main/sphinx/connector/delta-lake.md b/docs/src/main/sphinx/connector/delta-lake.md index a35c582adf2b..a6d2fa5afb4d 100644 --- a/docs/src/main/sphinx/connector/delta-lake.md +++ b/docs/src/main/sphinx/connector/delta-lake.md @@ -161,6 +161,13 @@ values. Typical usage does not require you to configure them. - Maximum number of metastore data objects per transaction in the Hive metastore cache. - `1000` +* - `delta.metastore.store-table-metadata` + - Store table comments and colum definitions in the metastore. The write + permission is required to update the metastore. + - `false` +* - `delta.metastore.store-table-metadata-threads` + - Number of threads used for storing table metadata in metastore. + - `5` * - `delta.delete-schema-locations-fallback` - Whether schema locations are deleted when Trino can't determine whether they contain external files. diff --git a/plugin/trino-delta-lake/pom.xml b/plugin/trino-delta-lake/pom.xml index 83a1da3d930f..10740fd938e2 100644 --- a/plugin/trino-delta-lake/pom.xml +++ b/plugin/trino-delta-lake/pom.xml @@ -130,6 +130,11 @@ trino-plugin-toolkit + + io.trino.hive + hive-thrift + + jakarta.annotation jakarta.annotation-api @@ -181,6 +186,21 @@ jmxutils + + software.amazon.awssdk + aws-core + + + + software.amazon.awssdk + glue + + + + software.amazon.awssdk + utils + + com.fasterxml.jackson.core jackson-annotations @@ -275,12 +295,6 @@ runtime - - software.amazon.awssdk - glue - runtime - - com.github.docker-java docker-java-api diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java index aded91e665bd..d82d9749da55 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java @@ -74,6 +74,8 @@ public class DeltaLakeConfig private boolean collectExtendedStatisticsOnWrite = true; private HiveCompressionCodec compressionCodec = HiveCompressionCodec.SNAPPY; private long perTransactionMetastoreCacheMaximumSize = 1000; + private boolean storeTableMetadataEnabled; + private int storeTableMetadataThreads = 5; private boolean deleteSchemaLocationsFallback; private String parquetTimeZone = TimeZone.getDefault().getID(); private DataSize targetMaxFileSize = DataSize.of(1, GIGABYTE); @@ -377,6 +379,33 @@ public DeltaLakeConfig setPerTransactionMetastoreCacheMaximumSize(long perTransa return this; } + public boolean isStoreTableMetadataEnabled() + { + return storeTableMetadataEnabled; + } + + @Config("delta.metastore.store-table-metadata") + @ConfigDescription("Store table metadata in metastore") + public DeltaLakeConfig setStoreTableMetadataEnabled(boolean storeTableMetadataEnabled) + { + this.storeTableMetadataEnabled = storeTableMetadataEnabled; + return this; + } + + @Min(0) // Allow 0 to use the same thread for testing purpose + public int getStoreTableMetadataThreads() + { + return storeTableMetadataThreads; + } + + @Config("delta.metastore.store-table-metadata-threads") + @ConfigDescription("Number of threads used for storing table metadata in metastore") + public DeltaLakeConfig setStoreTableMetadataThreads(int storeTableMetadataThreads) + { + this.storeTableMetadataThreads = storeTableMetadataThreads; + return this; + } + public boolean isDeleteSchemaLocationsFallback() { return this.deleteSchemaLocationsFallback; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index fe55e251ab69..5284e386be0a 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableTable; import com.google.common.collect.Sets; +import com.google.common.collect.Streams; import dev.failsafe.Failsafe; import dev.failsafe.RetryPolicy; import io.airlift.json.JsonCodec; @@ -47,7 +48,10 @@ import io.trino.plugin.deltalake.expression.ParsingException; import io.trino.plugin.deltalake.expression.SparkExpressionParser; import io.trino.plugin.deltalake.metastore.DeltaLakeMetastore; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.TableUpdateInfo; import io.trino.plugin.deltalake.metastore.DeltaMetastoreTable; +import io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore; import io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException; import io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle; import io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId; @@ -78,6 +82,7 @@ import io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory; import io.trino.plugin.hive.TrinoViewHiveMetastore; import io.trino.plugin.hive.security.AccessControlMetadata; +import io.trino.spi.ErrorCode; import io.trino.spi.NodeManager; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; @@ -105,6 +110,7 @@ import io.trino.spi.connector.Constraint; import io.trino.spi.connector.ConstraintApplicationResult; import io.trino.spi.connector.ProjectionApplicationResult; +import io.trino.spi.connector.RelationCommentMetadata; import io.trino.spi.connector.RetryMode; import io.trino.spi.connector.RowChangeParadigm; import io.trino.spi.connector.SaveMode; @@ -160,6 +166,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; @@ -168,6 +175,7 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; +import java.util.function.UnaryOperator; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -189,6 +197,7 @@ import static io.trino.hive.formats.HiveClassNames.LAZY_SIMPLE_SERDE_CLASS; import static io.trino.hive.formats.HiveClassNames.SEQUENCEFILE_INPUT_FORMAT_CLASS; import static io.trino.metastore.StorageFormat.create; +import static io.trino.metastore.Table.TABLE_COMMENT; import static io.trino.plugin.base.filter.UtcConstraintExtractor.extractTupleDomain; import static io.trino.plugin.base.projection.ApplyProjectionUtil.ProjectedColumnRepresentation; import static io.trino.plugin.base.projection.ApplyProjectionUtil.extractSupportedProjectedColumns; @@ -217,6 +226,7 @@ import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isProjectionPushdownEnabled; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isQueryPartitionFilterRequired; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isStoreTableMetadataInMetastoreEnabled; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled; import static io.trino.plugin.deltalake.DeltaLakeSplitManager.partitionMatchesPredicate; import static io.trino.plugin.deltalake.DeltaLakeTableProperties.CHANGE_DATA_FEED_ENABLED_PROPERTY; @@ -228,8 +238,14 @@ import static io.trino.plugin.deltalake.DeltaLakeTableProperties.getCheckpointInterval; import static io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation; import static io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy; +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.containsSchemaString; +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.getLastTransactionVersion; +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.isSameTransactionVersion; +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters; import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY; import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE; +import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.convertToDeltaMetastoreTable; +import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.verifyDeltaLakeTable; import static io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.APPEND_ONLY_CONFIGURATION_KEY; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.COLUMN_MAPPING_PHYSICAL_NAME_CONFIGURATION_KEY; @@ -275,13 +291,17 @@ import static io.trino.plugin.hive.util.HiveUtil.escapeTableName; import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable; import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema; +import static io.trino.spi.ErrorType.EXTERNAL; import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static io.trino.spi.StandardErrorCode.GENERIC_USER_ERROR; import static io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY; import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; import static io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY; +import static io.trino.spi.StandardErrorCode.NOT_FOUND; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.StandardErrorCode.QUERY_REJECTED; +import static io.trino.spi.StandardErrorCode.TABLE_NOT_FOUND; +import static io.trino.spi.StandardErrorCode.UNSUPPORTED_TABLE_TYPE; import static io.trino.spi.connector.RetryMode.NO_RETRIES; import static io.trino.spi.connector.RowChangeParadigm.DELETE_ROW_AND_INSERT_ROW; import static io.trino.spi.connector.SchemaTableName.schemaTableName; @@ -319,7 +339,9 @@ import static java.util.UUID.randomUUID; import static java.util.function.Function.identity; import static java.util.function.Predicate.not; +import static java.util.stream.Collectors.collectingAndThen; import static java.util.stream.Collectors.partitioningBy; +import static java.util.stream.Collectors.toUnmodifiableSet; public class DeltaLakeMetadata implements ConnectorMetadata @@ -407,6 +429,8 @@ public class DeltaLakeMetadata private final boolean deleteSchemaLocationsFallback; private final boolean useUniqueTableLocation; private final boolean allowManagedTableRename; + private final DeltaLakeTableMetadataScheduler metadataScheduler; + private final Map tableUpdateInfos = new ConcurrentHashMap<>(); private final Map latestTableVersions = new ConcurrentHashMap<>(); private final Map queriedSnapshots = new ConcurrentHashMap<>(); @@ -437,6 +461,7 @@ public DeltaLakeMetadata( boolean deleteSchemaLocationsFallback, DeltaLakeRedirectionsProvider deltaLakeRedirectionsProvider, CachingExtendedStatisticsAccess statisticsAccess, + DeltaLakeTableMetadataScheduler metadataScheduler, boolean useUniqueTableLocation, boolean allowManagedTableRename) { @@ -459,6 +484,7 @@ public DeltaLakeMetadata( this.deltaLakeRedirectionsProvider = requireNonNull(deltaLakeRedirectionsProvider, "deltaLakeRedirectionsProvider is null"); this.statisticsAccess = requireNonNull(statisticsAccess, "statisticsAccess is null"); this.deleteSchemaLocationsFallback = deleteSchemaLocationsFallback; + this.metadataScheduler = requireNonNull(metadataScheduler, "metadataScheduler is null"); this.useUniqueTableLocation = useUniqueTableLocation; this.allowManagedTableRename = allowManagedTableRename; } @@ -576,13 +602,14 @@ public LocatedTableHandle getTableHandle( // Pretend the table does not exist to produce better error message in case of table redirects to Hive return null; } - Optional table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()); - if (table.isEmpty()) { + Optional metastoreTable = metastore.getRawMetastoreTable(tableName.getSchemaName(), tableName.getTableName()); + if (metastoreTable.isEmpty()) { return null; } - boolean managed = table.get().managed(); + DeltaMetastoreTable table = convertToDeltaMetastoreTable(metastoreTable.get()); + boolean managed = table.managed(); - String tableLocation = table.get().location(); + String tableLocation = table.location(); TrinoFileSystem fileSystem = fileSystemFactory.create(session); TableSnapshot tableSnapshot = getSnapshot(session, tableName, tableLocation, endVersion.map(version -> getVersion(fileSystem, tableLocation, version))); @@ -620,6 +647,11 @@ public LocatedTableHandle getTableHandle( return null; } verifySupportedColumnMapping(getColumnMappingMode(metadataEntry, protocolEntry)); + if (metadataScheduler.canStoreTableMetadata(session, metadataEntry.getSchemaString(), Optional.ofNullable(metadataEntry.getDescription())) && + endVersion.isEmpty() && + !isSameTransactionVersion(metastoreTable.get(), tableSnapshot)) { + tableUpdateInfos.put(tableName, new TableUpdateInfo(session, tableSnapshot.getVersion(), metadataEntry.getSchemaString(), Optional.ofNullable(metadataEntry.getDescription()))); + } return new DeltaLakeTableHandle( tableName.getSchemaName(), tableName.getTableName(), @@ -820,6 +852,93 @@ public Optional getInsertLayout(ConnectorSession session, return Optional.of(new ConnectorTableLayout(partitionColumnNames)); } + @Override + public Iterator streamRelationComments(ConnectorSession session, Optional schemaName, UnaryOperator> relationFilter) + { + Map viewDefinitions = getViews(session, schemaName); + ImmutableList.Builder commentMetadataBuilder = ImmutableList.builderWithExpectedSize(viewDefinitions.size()); + ImmutableSet.Builder viewNamesBuilder = ImmutableSet.builderWithExpectedSize(viewDefinitions.size()); + for (Entry viewDefinitionEntry : viewDefinitions.entrySet()) { + RelationCommentMetadata relationCommentMetadata = RelationCommentMetadata.forRelation(viewDefinitionEntry.getKey(), viewDefinitionEntry.getValue().getComment()); + commentMetadataBuilder.add(relationCommentMetadata); + viewNamesBuilder.add(relationCommentMetadata.name()); + } + List views = commentMetadataBuilder.build(); + Set viewNames = viewNamesBuilder.build(); + + TrinoFileSystem fileSystem = fileSystemFactory.create(session); + + Stream tables = listTables(session, schemaName).stream() + .filter(tableName -> !viewNames.contains(tableName)) + .collect(collectingAndThen(toUnmodifiableSet(), relationFilter)).stream() + .map(tableName -> getRelationCommentMetadata(session, fileSystem, tableName)) + .filter(Objects::nonNull); + + Set availableViews = relationFilter.apply(viewNames); + return Streams.concat(views.stream().filter(commentMetadata -> availableViews.contains(commentMetadata.name())), tables) + .iterator(); + } + + private RelationCommentMetadata getRelationCommentMetadata(ConnectorSession session, TrinoFileSystem fileSystem, SchemaTableName tableName) + { + if (redirectTable(session, tableName).isPresent()) { + return RelationCommentMetadata.forRedirectedTable(tableName); + } + + try { + Optional
metastoreTable = metastore.getRawMetastoreTable(tableName.getSchemaName(), tableName.getTableName()); + if (metastoreTable.isEmpty()) { + // this may happen when table is being deleted concurrently + return null; + } + + Table table = metastoreTable.get(); + verifyDeltaLakeTable(table); + + String tableLocation = HiveMetastoreBackedDeltaLakeMetastore.getTableLocation(table); + if (canUseTableParametersFromMetastore(session, fileSystem, table, tableLocation)) { + // Don't check TABLE_COMMENT existence because it's not stored in case of null comment + return RelationCommentMetadata.forRelation(tableName, Optional.ofNullable(table.getParameters().get(TABLE_COMMENT))); + } + + TableSnapshot snapshot = getSnapshot(session, tableName, tableLocation, Optional.empty()); + MetadataEntry metadata = transactionLogAccess.getMetadataEntry(session, snapshot); + return RelationCommentMetadata.forRelation(tableName, Optional.ofNullable(metadata.getDescription())); + } + catch (RuntimeException e) { + boolean suppressed = false; + if (e instanceof TrinoException trinoException) { + ErrorCode errorCode = trinoException.getErrorCode(); + suppressed = errorCode.equals(UNSUPPORTED_TABLE_TYPE.toErrorCode()) || + // e.g. table deleted concurrently + errorCode.equals(TABLE_NOT_FOUND.toErrorCode()) || + errorCode.equals(NOT_FOUND.toErrorCode()) || + // e.g. Delta table being deleted concurrently resulting in failure to load metadata from filesystem + errorCode.getType() == EXTERNAL; + } + if (suppressed) { + LOG.debug("Failed to get metadata for table: %s", tableName); + } + else { + // getTableHandle or getTableMetadata failed call may fail if table disappeared during listing or is unsupported + LOG.warn("Failed to get metadata for table: %s", tableName); + } + // Since the getTableHandle did not return null (i.e. succeeded or failed), we assume the table would be returned by listTables + return RelationCommentMetadata.forRelation(tableName, Optional.empty()); + } + } + + private static boolean canUseTableParametersFromMetastore(ConnectorSession session, TrinoFileSystem fileSystem, Table table, String tableLocation) + { + if (!isStoreTableMetadataInMetastoreEnabled(session)) { + return false; + } + + return getLastTransactionVersion(table) + .map(version -> isLatestVersion(fileSystem, tableLocation, version)) + .orElse(false); + } + @Override public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix) { @@ -833,25 +952,37 @@ public Iterator streamTableColumns(ConnectorSession sessio .map(_ -> singletonList(prefix.toSchemaTableName())) .orElseGet(() -> listTables(session, prefix.getSchema())); + TrinoFileSystem fileSystem = fileSystemFactory.create(session); + return tables.stream() - .flatMap(table -> { + .flatMap(tableName -> { try { - if (redirectTable(session, table).isPresent()) { + if (redirectTable(session, tableName).isPresent()) { // put "redirect marker" for current table - return Stream.of(TableColumnsMetadata.forRedirectedTable(table)); + return Stream.of(TableColumnsMetadata.forRedirectedTable(tableName)); } - Optional metastoreTable = metastore.getTable(table.getSchemaName(), table.getTableName()); + Optional
metastoreTable = metastore.getRawMetastoreTable(tableName.getSchemaName(), tableName.getTableName()); if (metastoreTable.isEmpty()) { // this may happen when table is being deleted concurrently, return Stream.of(); } - String tableLocation = metastoreTable.get().location(); - TableSnapshot snapshot = transactionLogAccess.loadSnapshot(session, table, tableLocation, Optional.empty()); + + Table table = metastoreTable.get(); + verifyDeltaLakeTable(table); + + String tableLocation = HiveMetastoreBackedDeltaLakeMetastore.getTableLocation(table); + if (containsSchemaString(table) && canUseTableParametersFromMetastore(session, fileSystem, table, tableLocation)) { + List columnsMetadata = metadataScheduler.getColumnsMetadata(table); + return Stream.of(TableColumnsMetadata.forTable(tableName, columnsMetadata)); + } + // Don't store cache in streamTableColumns method for avoiding too many update calls + + TableSnapshot snapshot = transactionLogAccess.loadSnapshot(session, tableName, tableLocation, Optional.empty()); MetadataEntry metadata = transactionLogAccess.getMetadataEntry(session, snapshot); ProtocolEntry protocol = transactionLogAccess.getProtocolEntry(session, snapshot); List columnMetadata = getTableColumnMetadata(metadata, protocol); - return Stream.of(TableColumnsMetadata.forTable(table, columnMetadata)); + return Stream.of(TableColumnsMetadata.forTable(tableName, columnMetadata)); } catch (NotADeltaLakeTableException | IOException e) { return Stream.empty(); @@ -859,13 +990,28 @@ public Iterator streamTableColumns(ConnectorSession sessio catch (RuntimeException e) { // this may happen when table is being deleted concurrently, it still exists in metastore but TL is no longer present // there can be several different exceptions thrown this is why all RTE are caught and ignored here - LOG.debug(e, "Ignored exception when trying to list columns from %s", table); + LOG.debug(e, "Ignored exception when trying to list columns from %s", tableName); return Stream.empty(); } }) .iterator(); } + private static boolean isLatestVersion(TrinoFileSystem fileSystem, String tableLocation, long version) + { + String transactionLogDir = getTransactionLogDir(tableLocation); + Location transactionLogJsonEntryPath = getTransactionLogJsonEntryPath(transactionLogDir, version); + Location nextTransactionLogJsonEntryPath = getTransactionLogJsonEntryPath(transactionLogDir, version + 1); + try { + return !fileSystem.newInputFile(nextTransactionLogJsonEntryPath).exists() && + fileSystem.newInputFile(transactionLogJsonEntryPath).exists(); + } + catch (IOException e) { + LOG.debug(e, "Failed to check table location: %s", tableLocation); + return false; + } + } + private List getColumns(MetadataEntry deltaMetadata, ProtocolEntry protocolEntry) { ImmutableList.Builder columns = ImmutableList.builder(); @@ -999,6 +1145,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe checkPathContainsNoFiles(session, Location.of(location)); external = false; } + long commitVersion = 0; Location deltaLogDirectory = Location.of(getTransactionLogDir(location)); Optional checkpointInterval = getCheckpointInterval(tableMetadata.getProperties()); Optional changeDataFeedEnabled = getChangeDataFeedEnabled(tableMetadata.getProperties()); @@ -1025,6 +1172,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe maxFieldId = OptionalInt.of(fieldId.get()); } + String schemaString = serializeSchemaAsJson(deltaTable.build()); try { TrinoFileSystem fileSystem = fileSystemFactory.create(session); boolean transactionLogFileExists = fileSystem.listFiles(deltaLogDirectory).hasNext(); @@ -1035,7 +1183,6 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe "Using CREATE [OR REPLACE] TABLE with an existing table content is disallowed, instead use the system.register_table() procedure."); } else { - long commitVersion = 0; TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriterWithoutTransactionIsolation(session, location); ProtocolEntry protocolEntry; if (replaceExistingTable) { @@ -1086,7 +1233,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Unable to access file system for: " + location, e); } - Table table = buildTable(session, schemaTableName, location, external); + Table table = buildTable(session, schemaTableName, location, external, tableMetadata.getComment(), commitVersion, schemaString); PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow()); // As a precaution, clear the caches @@ -1100,7 +1247,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe } } - public static Table buildTable(ConnectorSession session, SchemaTableName schemaTableName, String location, boolean isExternal) + public Table buildTable(ConnectorSession session, SchemaTableName schemaTableName, String location, boolean isExternal, Optional tableComment, long version, String schemaString) { Table.Builder tableBuilder = Table.builder() .setDatabaseName(schemaTableName.getSchemaName()) @@ -1108,13 +1255,13 @@ public static Table buildTable(ConnectorSession session, SchemaTableName schemaT .setOwner(Optional.of(session.getUser())) .setTableType(isExternal ? EXTERNAL_TABLE.name() : MANAGED_TABLE.name()) .setDataColumns(DUMMY_DATA_COLUMNS) - .setParameters(deltaTableProperties(session, location, isExternal)); + .setParameters(deltaTableProperties(session, location, isExternal, tableComment, version, schemaString)); setDeltaStorageFormat(tableBuilder, location); return tableBuilder.build(); } - private static Map deltaTableProperties(ConnectorSession session, String location, boolean external) + private Map deltaTableProperties(ConnectorSession session, String location, boolean external, Optional tableComment, long version, String schemaString) { ImmutableMap.Builder properties = ImmutableMap.builder() .put(TRINO_QUERY_ID_NAME, session.getQueryId()) @@ -1130,6 +1277,9 @@ private static Map deltaTableProperties(ConnectorSession session // Mimicking the behavior of the Hive connector which sets both `Table#setTableType` and the "EXTERNAL" table property properties.put("EXTERNAL", "TRUE"); } + if (metadataScheduler.canStoreTableMetadata(session, schemaString, tableComment)) { + properties.putAll(tableMetadataParameters(version, schemaString, tableComment)); + } return properties.buildOrThrow(); } @@ -1393,7 +1543,6 @@ public Optional finishCreateTable( .collect(toImmutableList()); SchemaTableName schemaTableName = schemaTableName(schemaName, tableName); - Table table = buildTable(session, schemaTableName, location, handle.external()); ColumnMappingMode columnMappingMode = handle.columnMappingMode(); String schemaString = handle.schemaString(); @@ -1478,6 +1627,7 @@ public Optional finishCreateTable( true); } + Table table = buildTable(session, schemaTableName, location, handle.external(), handle.comment(), commitVersion, handle.schemaString()); PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow()); // As a precaution, clear the caches @@ -1521,6 +1671,7 @@ public void setTableComment(ConnectorSession session, ConnectorTableHandle table if (columnMappingMode != ID && columnMappingMode != NAME && columnMappingMode != NONE) { throw new TrinoException(NOT_SUPPORTED, "Setting a table comment with column mapping %s is not supported".formatted(columnMappingMode)); } + MetadataEntry metadataEntry = handle.getMetadataEntry(); ProtocolEntry protocolEntry = handle.getProtocolEntry(); checkUnsupportedWriterFeatures(protocolEntry); @@ -1537,6 +1688,7 @@ public void setTableComment(ConnectorSession session, ConnectorTableHandle table MetadataEntry.builder(handle.getMetadataEntry()) .setDescription(comment)); transactionLogWriter.flush(); + enqueueUpdateInfo(session, handle.getSchemaName(), handle.getTableName(), commitVersion, metadataEntry.getSchemaString(), comment); } catch (Exception e) { throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to comment on table: %s.%s", handle.getSchemaName(), handle.getTableName()), e); @@ -1563,6 +1715,7 @@ public void setColumnComment(ConnectorSession session, ConnectorTableHandle tabl DeltaLakeTable deltaTable = DeltaLakeTable.builder(deltaLakeTableHandle.getMetadataEntry(), deltaLakeTableHandle.getProtocolEntry()) .setColumnComment(deltaLakeColumnHandle.getBaseColumnName(), comment.orElse(null)) .build(); + String schemaString = serializeSchemaAsJson(deltaTable); TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, deltaLakeTableHandle.getLocation()); appendTableEntries( @@ -1572,8 +1725,15 @@ public void setColumnComment(ConnectorSession session, ConnectorTableHandle tabl session, protocolEntry, MetadataEntry.builder(deltaLakeTableHandle.getMetadataEntry()) - .setSchemaString(serializeSchemaAsJson(deltaTable))); + .setSchemaString(schemaString)); transactionLogWriter.flush(); + enqueueUpdateInfo( + session, + deltaLakeTableHandle.getSchemaName(), + deltaLakeTableHandle.getTableName(), + commitVersion, + schemaString, + Optional.ofNullable(deltaLakeTableHandle.getMetadataEntry().getDescription())); } catch (Exception e) { throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to add '%s' column comment for: %s.%s", deltaLakeColumnHandle.getBaseColumnName(), deltaLakeTableHandle.getSchemaName(), deltaLakeTableHandle.getTableName()), e); @@ -1637,7 +1797,7 @@ public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle newColumnMetadata.getComment(), generateColumnMetadata(columnMappingMode, maxColumnId)) .build(); - + String schemaString = serializeSchemaAsJson(deltaTable); Map configuration = new HashMap<>(handle.getMetadataEntry().getConfiguration()); if (columnMappingMode == ID || columnMappingMode == NAME) { checkArgument(maxColumnId.get() > 0, "maxColumnId must be larger than 0: %s", maxColumnId); @@ -1652,9 +1812,16 @@ public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle session, buildProtocolEntryForNewColumn(protocolEntry, newColumnMetadata.getType()), MetadataEntry.builder(handle.getMetadataEntry()) - .setSchemaString(serializeSchemaAsJson(deltaTable)) + .setSchemaString(schemaString) .setConfiguration(configuration)); transactionLogWriter.flush(); + enqueueUpdateInfo( + session, + handle.getSchemaName(), + handle.getTableName(), + commitVersion, + schemaString, + Optional.ofNullable(handle.getMetadataEntry().getDescription())); } catch (Exception e) { throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to add '%s' column for: %s.%s %s", newColumnMetadata.getName(), handle.getSchemaName(), handle.getTableName(), firstNonNull(e.getMessage(), e)), e); @@ -1727,6 +1894,7 @@ public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandl throw new TrinoException(NOT_SUPPORTED, "Dropping the last non-partition column is unsupported"); } + String schemaString = serializeSchemaAsJson(deltaTable); try { TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, table.getLocation()); appendTableEntries( @@ -1736,8 +1904,9 @@ public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandl session, protocolEntry, MetadataEntry.builder(metadataEntry) - .setSchemaString(serializeSchemaAsJson(deltaTable))); + .setSchemaString(schemaString)); transactionLogWriter.flush(); + enqueueUpdateInfo(session, table.getSchemaName(), table.getTableName(), commitVersion, schemaString, Optional.ofNullable(metadataEntry.getDescription())); } catch (Exception e) { throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to drop '%s' column from: %s.%s", dropColumnName, table.getSchemaName(), table.getTableName()), e); @@ -1792,6 +1961,7 @@ public void renameColumn(ConnectorSession session, ConnectorTableHandle tableHan DeltaLakeTable deltaTable = DeltaLakeTable.builder(metadataEntry, protocolEntry) .renameColumn(sourceColumnName, newColumnName) .build(); + String schemaString = serializeSchemaAsJson(deltaTable); try { TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, table.getLocation()); appendTableEntries( @@ -1801,9 +1971,10 @@ public void renameColumn(ConnectorSession session, ConnectorTableHandle tableHan session, protocolEntry, MetadataEntry.builder(metadataEntry) - .setSchemaString(serializeSchemaAsJson(deltaTable)) + .setSchemaString(schemaString) .setPartitionColumns(partitionColumns)); transactionLogWriter.flush(); + enqueueUpdateInfo(session, table.getSchemaName(), table.getTableName(), commitVersion, schemaString, Optional.ofNullable(metadataEntry.getDescription())); // Don't update extended statistics because it uses physical column names internally } catch (Exception e) { @@ -1827,17 +1998,20 @@ public void dropNotNullConstraint(ConnectorSession session, ConnectorTableHandle DeltaLakeTable deltaTable = DeltaLakeTable.builder(metadataEntry, protocolEntry) .dropNotNullConstraint(columnName) .build(); + long commitVersion = table.getReadVersion() + 1; + String schemaString = serializeSchemaAsJson(deltaTable); try { TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, table.getLocation()); appendTableEntries( - table.getReadVersion() + 1, + commitVersion, transactionLogWriter, CHANGE_COLUMN_OPERATION, session, protocolEntry, MetadataEntry.builder(metadataEntry) - .setSchemaString(serializeSchemaAsJson(deltaTable))); + .setSchemaString(schemaString)); transactionLogWriter.flush(); + enqueueUpdateInfo(session, table.getSchemaName(), table.getTableName(), commitVersion, schemaString, Optional.ofNullable(metadataEntry.getDescription())); } catch (Exception e) { throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to drop not null constraint from '%s' column in: %s", columnName, table.getSchemaTableName()), e); @@ -1975,6 +2149,7 @@ public Optional finishInsert( .get(context -> commitInsertOperation(session, handle, sourceTableHandles, isolationLevel, dataFileInfos, readVersion, context.getAttemptCount())); writeCommitted = true; writeCheckpointIfNeeded(session, handle.tableName(), handle.location(), handle.readVersion(), handle.metadataEntry().getCheckpointInterval(), commitVersion); + enqueueUpdateInfo(session, handle.tableName().getSchemaName(), handle.tableName().getTableName(), commitVersion, handle.metadataEntry().getSchemaString(), Optional.ofNullable(handle.metadataEntry().getDescription())); if (isCollectExtendedStatisticsColumnStatisticsOnWrite(session) && !computedStatistics.isEmpty() && !dataFileInfos.isEmpty()) { // TODO (https://github.com/trinodb/trino/issues/16088) Add synchronization when version conflict for INSERT is resolved. @@ -2281,6 +2456,13 @@ public void finishMerge( long commitVersion = Failsafe.with(TRANSACTION_CONFLICT_RETRY_POLICY) .get(context -> commitMergeOperation(session, mergeHandle, mergeResults, sourceTableHandles, isolationLevel, allFiles, readVersion, context.getAttemptCount())); writeCommitted = true; + enqueueUpdateInfo( + session, + handle.getSchemaName(), + handle.getTableName(), + commitVersion, + handle.getMetadataEntry().getSchemaString(), + Optional.ofNullable(handle.getMetadataEntry().getDescription())); writeCheckpointIfNeeded(session, handle.getSchemaTableName(), handle.getLocation(), handle.getReadVersion(), checkpointInterval, commitVersion); } @@ -2557,6 +2739,13 @@ private void finishOptimize(ConnectorSession session, DeltaLakeTableExecuteHandl transactionLogWriter.flush(); writeCommitted = true; + enqueueUpdateInfo( + session, + executeHandle.schemaTableName().getSchemaName(), + executeHandle.schemaTableName().getTableName(), + commitVersion, + optimizeHandle.getMetadataEntry().getSchemaString(), + Optional.ofNullable(optimizeHandle.getMetadataEntry().getDescription())); Optional checkpointInterval = Optional.of(1L); // force checkpoint writeCheckpointIfNeeded( session, @@ -2846,6 +3035,13 @@ public void setTableProperties(ConnectorSession session, ConnectorTableHandle ta metadataEntry.ifPresent(transactionLogWriter::appendMetadataEntry); transactionLogWriter.flush(); + enqueueUpdateInfo( + session, + handle.getSchemaName(), + handle.getTableName(), + commitVersion, + metadataEntry.orElseThrow().getSchemaString(), + Optional.ofNullable(metadataEntry.orElseThrow().getDescription())); } catch (IOException e) { throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Failed to write Delta Lake transaction log entry", e); @@ -3868,6 +4064,13 @@ private OptionalLong executeDelete(ConnectorSession session, ConnectorTableHandl tableHandle.getReadVersion(), tableHandle.getMetadataEntry().getCheckpointInterval(), commitDeleteOperationResult.commitVersion()); + enqueueUpdateInfo( + session, + tableHandle.getSchemaName(), + tableHandle.getTableName(), + commitDeleteOperationResult.commitVersion, + tableHandle.getMetadataEntry().getSchemaString(), + Optional.ofNullable(tableHandle.getMetadataEntry().getDescription())); return commitDeleteOperationResult.deletedRecords(); } catch (Exception e) { @@ -3921,6 +4124,20 @@ private record CommitDeleteOperationResult(long commitVersion, OptionalLong dele } } + private void enqueueUpdateInfo(ConnectorSession session, String schemaName, String tableName, long version, String schemaString, Optional tableComment) + { + if (!metadataScheduler.canStoreTableMetadata(session, schemaString, tableComment)) { + return; + } + tableUpdateInfos.put(new SchemaTableName(schemaName, tableName), new TableUpdateInfo(session, version, schemaString, tableComment)); + } + + public void commit() + { + metadataScheduler.putAll(tableUpdateInfos); + tableUpdateInfos.clear(); + } + private Stream getAddFileEntriesMatchingEnforcedPartitionConstraint(ConnectorSession session, DeltaLakeTableHandle tableHandle) { TableSnapshot tableSnapshot = getSnapshot(session, tableHandle); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java index 0bb1efc579ca..485ef534d91f 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java @@ -16,6 +16,7 @@ import com.google.inject.Inject; import io.airlift.json.JsonCodec; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler; import io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore; import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess; import io.trino.plugin.deltalake.statistics.FileBasedTableStatisticsProvider; @@ -56,6 +57,7 @@ public class DeltaLakeMetadataFactory private final long perTransactionMetastoreCacheMaximumSize; private final boolean deleteSchemaLocationsFallback; private final boolean useUniqueTableLocation; + private final DeltaLakeTableMetadataScheduler metadataScheduler; private final boolean allowManagedTableRename; private final String trinoVersion; @@ -76,7 +78,8 @@ public DeltaLakeMetadataFactory( DeltaLakeRedirectionsProvider deltaLakeRedirectionsProvider, CachingExtendedStatisticsAccess statisticsAccess, @AllowDeltaLakeManagedTableRename boolean allowManagedTableRename, - NodeVersion nodeVersion) + NodeVersion nodeVersion, + DeltaLakeTableMetadataScheduler metadataScheduler) { this.hiveMetastoreFactory = requireNonNull(hiveMetastoreFactory, "hiveMetastore is null"); this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); @@ -98,6 +101,7 @@ public DeltaLakeMetadataFactory( this.useUniqueTableLocation = deltaLakeConfig.isUniqueTableLocation(); this.allowManagedTableRename = allowManagedTableRename; this.trinoVersion = requireNonNull(nodeVersion, "nodeVersion is null").toString(); + this.metadataScheduler = requireNonNull(metadataScheduler, "metadataScheduler is null"); } public DeltaLakeMetadata create(ConnectorIdentity identity) @@ -135,6 +139,7 @@ public DeltaLakeMetadata create(ConnectorIdentity identity) deleteSchemaLocationsFallback, deltaLakeRedirectionsProvider, statisticsAccess, + metadataScheduler, useUniqueTableLocation, allowManagedTableRename); } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java index 7960612bf0ee..3026b015b3e4 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java @@ -27,6 +27,7 @@ import io.trino.plugin.deltalake.cache.DeltaLakeCacheKeyProvider; import io.trino.plugin.deltalake.functions.tablechanges.TableChangesFunctionProvider; import io.trino.plugin.deltalake.functions.tablechanges.TableChangesProcessorProvider; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler; import io.trino.plugin.deltalake.procedure.DropExtendedStatsProcedure; import io.trino.plugin.deltalake.procedure.FlushMetadataCacheProcedure; import io.trino.plugin.deltalake.procedure.OptimizeTableProcedure; @@ -117,6 +118,9 @@ public void setup(Binder binder) binder.bind(TransactionLogAccess.class).in(Scopes.SINGLETON); newExporter(binder).export(TransactionLogAccess.class) .as(generator -> generator.generatedNameOf(TransactionLogAccess.class, catalogName.get().toString())); + binder.bind(DeltaLakeTableMetadataScheduler.class).in(Scopes.SINGLETON); + newExporter(binder).export(DeltaLakeTableMetadataScheduler.class) + .as(generator -> generator.generatedNameOf(DeltaLakeTableMetadataScheduler.class, catalogName.get().toString())); binder.bind(TransactionLogWriterFactory.class).in(Scopes.SINGLETON); binder.bind(TransactionLogSynchronizerManager.class).in(Scopes.SINGLETON); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java index d3f8e8eb398f..01e744dc3047 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java @@ -75,6 +75,7 @@ public final class DeltaLakeSessionProperties private static final String PROJECTION_PUSHDOWN_ENABLED = "projection_pushdown_enabled"; private static final String QUERY_PARTITION_FILTER_REQUIRED = "query_partition_filter_required"; private static final String CHECKPOINT_FILTERING_ENABLED = "checkpoint_filtering_enabled"; + private static final String STORE_TABLE_METADATA = "store_table_metadata"; private final List> sessionProperties; @@ -230,7 +231,12 @@ public DeltaLakeSessionProperties( CHECKPOINT_FILTERING_ENABLED, "Use filter in checkpoint reader", deltaLakeConfig.isCheckpointFilteringEnabled(), - false)); + false), + booleanProperty( + STORE_TABLE_METADATA, + "Store table metadata in metastore", + deltaLakeConfig.isStoreTableMetadataEnabled(), + true)); } @Override @@ -348,4 +354,9 @@ public static boolean isCheckpointFilteringEnabled(ConnectorSession session) { return session.getProperty(CHECKPOINT_FILTERING_ENABLED, Boolean.class); } + + public static boolean isStoreTableMetadataInMetastoreEnabled(ConnectorSession session) + { + return session.getProperty(STORE_TABLE_METADATA, Boolean.class); + } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java index 175f64c699ca..9aa3e56cb1ea 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java @@ -53,6 +53,11 @@ public void commit(ConnectorTransactionHandle transaction) { MemoizedMetadata deltaLakeMetadata = transactions.remove(transaction); checkArgument(deltaLakeMetadata != null, "no such transaction: %s", transaction); + deltaLakeMetadata.optionalGet().ifPresent(metadata -> { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(getClass().getClassLoader())) { + metadata.commit(); + } + }); } public void rollback(ConnectorTransactionHandle transaction) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/MaxTableParameterLength.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/MaxTableParameterLength.java new file mode 100644 index 000000000000..aeb0e9f7f6a0 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/MaxTableParameterLength.java @@ -0,0 +1,29 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake; + +import com.google.inject.BindingAnnotation; + +import java.lang.annotation.Retention; +import java.lang.annotation.Target; + +import static java.lang.annotation.ElementType.FIELD; +import static java.lang.annotation.ElementType.METHOD; +import static java.lang.annotation.ElementType.PARAMETER; +import static java.lang.annotation.RetentionPolicy.RUNTIME; + +@Retention(RUNTIME) +@Target({FIELD, PARAMETER, METHOD}) +@BindingAnnotation +public @interface MaxTableParameterLength {} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableMetadataScheduler.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableMetadataScheduler.java new file mode 100644 index 000000000000..7136098f0d4a --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableMetadataScheduler.java @@ -0,0 +1,263 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.inject.Inject; +import io.airlift.concurrent.MoreFutures; +import io.airlift.log.Logger; +import io.trino.metastore.Table; +import io.trino.plugin.deltalake.DeltaLakeColumnMetadata; +import io.trino.plugin.deltalake.DeltaLakeConfig; +import io.trino.plugin.deltalake.MaxTableParameterLength; +import io.trino.plugin.deltalake.transactionlog.TableSnapshot; +import io.trino.spi.NodeManager; +import io.trino.spi.connector.ColumnMetadata; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.TableNotFoundException; +import io.trino.spi.type.TypeManager; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.PreDestroy; +import org.weakref.jmx.Managed; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.Callable; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.atomic.AtomicInteger; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService; +import static io.airlift.concurrent.Threads.daemonThreadsNamed; +import static io.airlift.concurrent.Threads.threadsNamed; +import static io.trino.metastore.Table.TABLE_COMMENT; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isStoreTableMetadataInMetastoreEnabled; +import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.ColumnMappingMode.NONE; +import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.getColumnMetadata; +import static java.util.Comparator.comparing; +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.Executors.newFixedThreadPool; +import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.function.BinaryOperator.maxBy; + +public class DeltaLakeTableMetadataScheduler +{ + private static final Logger log = Logger.get(DeltaLakeTableMetadataScheduler.class); + + private static final String TRINO_LAST_TRANSACTION_VERSION = "trino_last_transaction_version"; + private static final String TRINO_METADATA_SCHEMA_STRING = "trino_metadata_schema_string"; + private static final int MAX_FAILED_COUNTS = 10; + + private final DeltaLakeTableOperationsProvider tableOperationsProvider; + private final TypeManager typeManager; + private final int tableParameterLengthLimit; + private final int storeTableMetadataThreads; + private final Map updateInfos = new ConcurrentHashMap<>(); + private final boolean enabled; + + private ExecutorService executor; + private ScheduledExecutorService scheduler; + private final AtomicInteger failedCounts = new AtomicInteger(); + + @Inject + public DeltaLakeTableMetadataScheduler( + NodeManager nodeManager, + TypeManager typeManager, + DeltaLakeTableOperationsProvider tableOperationsProvider, + @MaxTableParameterLength int tableParameterLengthLimit, + DeltaLakeConfig config) + { + this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.tableOperationsProvider = requireNonNull(tableOperationsProvider, "tableOperationsProvider is null"); + this.tableParameterLengthLimit = tableParameterLengthLimit; + this.storeTableMetadataThreads = config.getStoreTableMetadataThreads(); + requireNonNull(nodeManager, "nodeManager is null"); + this.enabled = config.isStoreTableMetadataEnabled() && nodeManager.getCurrentNode().isCoordinator(); + } + + @Managed + public boolean isShutdown() + { + return scheduler.isShutdown(); + } + + public void putAll(Map tableParameters) + { + if (!enabled || scheduler.isShutdown()) { + log.debug("Scheduler is already shutdown, skipping the update: %s", tableParameters); + return; + } + updateInfos.putAll(tableParameters); + } + + @PostConstruct + public void start() + { + if (enabled) { + executor = storeTableMetadataThreads == 0 ? newDirectExecutorService() : newFixedThreadPool(storeTableMetadataThreads, threadsNamed("store-table-metadata-%s")); + scheduler = newSingleThreadScheduledExecutor(daemonThreadsNamed("store-table-metadata")); + + scheduler.scheduleWithFixedDelay(() -> { + try { + process(); + } + catch (Throwable e) { + log.warn(e, "Error storing table metadata"); + } + try { + checkFailedTasks(); + } + catch (Throwable e) { + log.warn(e, "Error canceling metadata update tasks"); + } + }, 200, 1000, MILLISECONDS); + } + } + + @VisibleForTesting + public void process() + { + List> tasks = new ArrayList<>(); + synchronized (this) { + if (updateInfos.isEmpty()) { + return; + } + + Map updateTables = updateInfos.entrySet().stream() + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue, maxBy(comparing(TableUpdateInfo::version)))); + + log.debug("Processing %s table(s): %s", updateTables.size(), updateTables.keySet()); + for (Map.Entry entry : updateTables.entrySet()) { + tasks.add(() -> { + updateTable(entry.getKey(), entry.getValue()); + return null; + }); + } + + updateInfos.clear(); + } + + try { + executor.invokeAll(tasks).forEach(MoreFutures::getDone); + } + catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException(e); + } + } + + private void updateTable(SchemaTableName schemaTableName, TableUpdateInfo info) + { + log.debug("Updating table: '%s'", schemaTableName); + try { + tableOperationsProvider.createTableOperations(info.session) + .commitToExistingTable(schemaTableName, info.version, info.schemaString, info.tableComment); + log.debug("Replaced table: '%s'", schemaTableName); + } + catch (TableNotFoundException e) { + // Don't increment failedCounts. The table might have been dropped concurrently. + log.debug("Table disappeared during metadata updating operation: '%s'", schemaTableName); + } + catch (Exception e) { + log.warn(e, "Failed to store table metadata for '%s'", schemaTableName); + // TODO Consider increment only when the exception is permission issue + failedCounts.incrementAndGet(); + } + } + + private void checkFailedTasks() + { + if (failedCounts.get() > MAX_FAILED_COUNTS) { + log.warn("Too many failed tasks, stopping the scheduler"); + stop(); + } + } + + @VisibleForTesting + public void clear() + { + updateInfos.clear(); + } + + @PreDestroy + public void stop() + { + if (enabled) { + scheduler.shutdownNow(); + executor.shutdownNow(); + } + } + + public static boolean isSameTransactionVersion(Table table, TableSnapshot snapshot) + { + return getLastTransactionVersion(table) + .map(version -> version == snapshot.getVersion()) + .orElse(false); + } + + public static Optional getLastTransactionVersion(Table table) + { + String version = table.getParameters().get(TRINO_LAST_TRANSACTION_VERSION); + return Optional.ofNullable(version).map(Long::parseLong); + } + + public static boolean containsSchemaString(Table table) + { + return table.getParameters().containsKey(TRINO_METADATA_SCHEMA_STRING); + } + + public List getColumnsMetadata(Table table) + { + String schemaString = table.getParameters().get(TRINO_METADATA_SCHEMA_STRING); + // Specify NONE and empty partition because they are unused when listing columns + return getColumnMetadata(schemaString, typeManager, NONE, ImmutableList.of()).stream() + .map(DeltaLakeColumnMetadata::columnMetadata) + .collect(toImmutableList()); + } + + public boolean canStoreTableMetadata(ConnectorSession session, String schemaString, Optional tableComment) + { + return isStoreTableMetadataInMetastoreEnabled(session) && + schemaString.length() <= tableParameterLengthLimit && + tableComment.map(String::length).orElse(0) <= tableParameterLengthLimit; + } + + public static Map tableMetadataParameters(long version, String schemaString, Optional tableComment) + { + ImmutableMap.Builder parameters = ImmutableMap.builder(); + tableComment.ifPresent(comment -> parameters.put(TABLE_COMMENT, comment)); + parameters.put(TRINO_LAST_TRANSACTION_VERSION, Long.toString(version)); + parameters.put(TRINO_METADATA_SCHEMA_STRING, schemaString); + return parameters.buildOrThrow(); + } + + public record TableUpdateInfo(ConnectorSession session, long version, String schemaString, Optional tableComment) + { + public TableUpdateInfo + { + requireNonNull(session, "session is null"); + requireNonNull(schemaString, "schemaString is null"); + requireNonNull(tableComment, "tableComment is null"); + } + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperations.java new file mode 100644 index 000000000000..b146833afb65 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperations.java @@ -0,0 +1,28 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore; + +import io.trino.annotation.NotThreadSafe; +import io.trino.spi.connector.SchemaTableName; + +import java.util.Optional; + +@NotThreadSafe +public interface DeltaLakeTableOperations +{ + /** + * @throws io.trino.spi.connector.TableNotFoundException if the table does not exist + */ + void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment); +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperationsProvider.java new file mode 100644 index 000000000000..53a8cc4a3cf8 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperationsProvider.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore; + +import io.trino.spi.connector.ConnectorSession; + +public interface DeltaLakeTableOperationsProvider +{ + DeltaLakeTableOperations createTableOperations(ConnectorSession session); +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java index 1c1533894392..3ff1ed7622e1 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java @@ -74,13 +74,8 @@ public Optional
getRawMetastoreTable(String databaseName, String tableNam @Override public Optional getTable(String databaseName, String tableName) { - return getRawMetastoreTable(databaseName, tableName).map(table -> { - verifyDeltaLakeTable(table); - return new DeltaMetastoreTable( - new SchemaTableName(databaseName, tableName), - table.getTableType().equals(MANAGED_TABLE.name()), - getTableLocation(table)); - }); + return getRawMetastoreTable(databaseName, tableName) + .map(HiveMetastoreBackedDeltaLakeMetastore::convertToDeltaMetastoreTable); } public static void verifyDeltaLakeTable(Table table) @@ -130,6 +125,15 @@ public void renameTable(SchemaTableName from, SchemaTableName to) delegate.renameTable(from.getSchemaName(), from.getTableName(), to.getSchemaName(), to.getTableName()); } + public static DeltaMetastoreTable convertToDeltaMetastoreTable(Table table) + { + verifyDeltaLakeTable(table); + return new DeltaMetastoreTable( + new SchemaTableName(table.getDatabaseName(), table.getTableName()), + table.getTableType().equals(MANAGED_TABLE.name()), + getTableLocation(table)); + } + public static String getTableLocation(Table table) { Map serdeParameters = table.getStorage().getSerdeParameters(); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java index 8c540afa471c..bda2f9ef71ad 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java @@ -15,8 +15,11 @@ import com.google.inject.Binder; import com.google.inject.Key; +import com.google.inject.Scopes; import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename; +import io.trino.plugin.deltalake.MaxTableParameterLength; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; import io.trino.plugin.hive.metastore.file.FileMetastoreModule; public class DeltaLakeFileMetastoreModule @@ -26,6 +29,8 @@ public class DeltaLakeFileMetastoreModule protected void setup(Binder binder) { install(new FileMetastoreModule()); + binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeFileMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON); binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true); + binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(Integer.MAX_VALUE); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperations.java new file mode 100644 index 000000000000..69019cf22133 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperations.java @@ -0,0 +1,52 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.file; + +import com.google.common.collect.ImmutableMap; +import io.trino.metastore.HiveMetastore; +import io.trino.metastore.Table; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.TableNotFoundException; + +import java.util.Map; +import java.util.Optional; + +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters; +import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet; +import static java.util.Objects.requireNonNull; + +public class DeltaLakeFileMetastoreTableOperations + implements DeltaLakeTableOperations +{ + private final HiveMetastore metastore; + + public DeltaLakeFileMetastoreTableOperations(HiveMetastore metastore) + { + this.metastore = requireNonNull(metastore, "metastore is null"); + } + + @Override + public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment) + { + Table currentTable = metastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()) + .orElseThrow(() -> new TableNotFoundException(schemaTableName)); + Map parameters = ImmutableMap.builder() + .putAll(currentTable.getParameters()) + .putAll(tableMetadataParameters(version, schemaString, tableComment)) + .buildKeepingLast(); + Table updatedTable = currentTable.withParameters(parameters); + metastore.replaceTable(currentTable.getDatabaseName(), currentTable.getTableName(), updatedTable, buildInitialPrivilegeSet(currentTable.getOwner().orElseThrow())); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperationsProvider.java new file mode 100644 index 000000000000..898679cb6497 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperationsProvider.java @@ -0,0 +1,42 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.file; + +import com.google.inject.Inject; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.spi.connector.ConnectorSession; + +import java.util.Optional; + +import static java.util.Objects.requireNonNull; + +public class DeltaLakeFileMetastoreTableOperationsProvider + implements DeltaLakeTableOperationsProvider +{ + private final HiveMetastoreFactory hiveMetastoreFactory; + + @Inject + public DeltaLakeFileMetastoreTableOperationsProvider(HiveMetastoreFactory hiveMetastoreFactory) + { + this.hiveMetastoreFactory = requireNonNull(hiveMetastoreFactory, "hiveMetastoreFactory is null"); + } + + @Override + public DeltaLakeTableOperations createTableOperations(ConnectorSession session) + { + return new DeltaLakeFileMetastoreTableOperations(hiveMetastoreFactory.createMetastore(Optional.of(session.getIdentity()))); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java index 179e76017ce0..a1b2ced44dca 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java @@ -15,18 +15,23 @@ import com.google.inject.Binder; import com.google.inject.Key; +import com.google.inject.Scopes; import com.google.inject.Singleton; import com.google.inject.multibindings.ProvidesIntoOptional; import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename; +import io.trino.plugin.deltalake.MaxTableParameterLength; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; import io.trino.plugin.hive.metastore.glue.GlueHiveMetastore; import io.trino.plugin.hive.metastore.glue.GlueMetastoreModule; +import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; import java.util.EnumSet; import java.util.Set; import static com.google.inject.multibindings.ProvidesIntoOptional.Type.ACTUAL; import static io.airlift.configuration.ConfigBinder.configBinder; +import static org.weakref.jmx.guice.ExportBinder.newExporter; public class DeltaLakeGlueMetastoreModule extends AbstractConfigurationAwareModule @@ -37,7 +42,12 @@ protected void setup(Binder binder) configBinder(binder).bindConfig(DeltaLakeGlueMetastoreConfig.class); install(new GlueMetastoreModule()); + binder.bind(GlueMetastoreStats.class).in(Scopes.SINGLETON); + newExporter(binder).export(GlueMetastoreStats.class).withGeneratedName(); + binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeGlueMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON); binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true); + // Limit per Glue API docs (https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html#Glue-Type-TableInput-Parameters as of this writing) + binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(512000); } @ProvidesIntoOptional(ACTUAL) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperations.java new file mode 100644 index 000000000000..ed612e15f613 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperations.java @@ -0,0 +1,91 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.glue; + +import com.google.common.collect.ImmutableMap; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.plugin.hive.metastore.glue.GlueCache; +import io.trino.plugin.hive.metastore.glue.GlueContext; +import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.TableNotFoundException; +import software.amazon.awssdk.services.glue.GlueClient; +import software.amazon.awssdk.services.glue.model.EntityNotFoundException; +import software.amazon.awssdk.services.glue.model.GetTableRequest; +import software.amazon.awssdk.services.glue.model.Table; +import software.amazon.awssdk.services.glue.model.TableInput; + +import java.util.Map; +import java.util.Optional; + +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters; +import static io.trino.plugin.hive.metastore.glue.GlueHiveMetastore.asTableInputBuilder; +import static java.util.Objects.requireNonNull; + +public class DeltaLakeGlueMetastoreTableOperations + implements DeltaLakeTableOperations +{ + private final GlueClient glueClient; + private final GlueContext glueContext; + private final GlueCache glueCache; + private final GlueMetastoreStats stats; + + public DeltaLakeGlueMetastoreTableOperations( + GlueClient glueClient, + GlueContext glueContext, + GlueCache glueCache, + GlueMetastoreStats stats) + { + this.glueClient = requireNonNull(glueClient, "glueClient is null"); + this.glueContext = requireNonNull(glueContext, "glueContext is null"); + this.glueCache = requireNonNull(glueCache, "glueCache is null"); + this.stats = requireNonNull(stats, "stats is null"); + } + + @Override + public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment) + { + GetTableRequest getTableRequest = GetTableRequest.builder() + .databaseName(schemaTableName.getSchemaName()) + .name(schemaTableName.getTableName()) + .build(); + Table currentTable; + try { + currentTable = glueClient.getTable(getTableRequest).table(); + } + catch (EntityNotFoundException e) { + throw new TableNotFoundException(schemaTableName); + } + String glueVersionId = currentTable.versionId(); + + stats.getUpdateTable().call(() -> glueClient.updateTable(builder -> builder + .applyMutation(glueContext::configureClient) + .databaseName(schemaTableName.getSchemaName()) + .tableInput(convertGlueTableToTableInput(currentTable, version, schemaString, tableComment)) + .versionId(glueVersionId))); + glueCache.invalidateTable(schemaTableName.getSchemaName(), schemaTableName.getTableName(), false); + } + + private static TableInput convertGlueTableToTableInput(Table glueTable, long version, String schemaString, Optional tableComment) + { + Map parameters = ImmutableMap.builder() + .putAll(glueTable.parameters()) + .putAll(tableMetadataParameters(version, schemaString, tableComment)) + .buildKeepingLast(); + + return asTableInputBuilder(glueTable) + .parameters(parameters) + .build(); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperationsProvider.java new file mode 100644 index 000000000000..7b945a5b44cc --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperationsProvider.java @@ -0,0 +1,53 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.glue; + +import com.google.inject.Inject; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; +import io.trino.plugin.hive.metastore.glue.GlueCache; +import io.trino.plugin.hive.metastore.glue.GlueContext; +import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; +import io.trino.spi.connector.ConnectorSession; +import software.amazon.awssdk.services.glue.GlueClient; + +import static java.util.Objects.requireNonNull; + +public class DeltaLakeGlueMetastoreTableOperationsProvider + implements DeltaLakeTableOperationsProvider +{ + private final GlueClient glueClient; + private final GlueContext glueContext; + private final GlueCache glueCache; + private final GlueMetastoreStats stats; + + @Inject + public DeltaLakeGlueMetastoreTableOperationsProvider( + GlueClient glueClient, + GlueContext glueContext, + GlueCache glueCache, + GlueMetastoreStats stats) + { + this.glueClient = requireNonNull(glueClient, "glueClient is null"); + this.glueContext = requireNonNull(glueContext, "glueContext is null"); + this.glueCache = requireNonNull(glueCache, "glueCache is null"); + this.stats = requireNonNull(stats, "stats is null"); + } + + @Override + public DeltaLakeTableOperations createTableOperations(ConnectorSession session) + { + return new DeltaLakeGlueMetastoreTableOperations(glueClient, glueContext, glueCache, stats); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java index f47d806ad7bc..2a68a126d8a9 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java @@ -16,10 +16,14 @@ import com.amazonaws.services.glue.model.Table; import com.google.inject.Binder; import com.google.inject.Key; +import com.google.inject.Scopes; import com.google.inject.TypeLiteral; import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename; +import io.trino.plugin.deltalake.MaxTableParameterLength; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; import io.trino.plugin.deltalake.metastore.glue.DeltaLakeGlueMetastoreConfig; +import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; import io.trino.plugin.hive.metastore.glue.v1.ForGlueHiveMetastore; import io.trino.plugin.hive.metastore.glue.v1.GlueMetastoreModule; @@ -27,6 +31,7 @@ import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder; import static io.airlift.configuration.ConfigBinder.configBinder; +import static org.weakref.jmx.guice.ExportBinder.newExporter; public class DeltaLakeGlueV1MetastoreModule extends AbstractConfigurationAwareModule @@ -40,6 +45,11 @@ protected void setup(Binder binder) .setBinding().toProvider(DeltaLakeGlueMetastoreTableFilterProvider.class); install(new GlueMetastoreModule()); + binder.bind(GlueMetastoreStats.class).in(Scopes.SINGLETON); + newExporter(binder).export(GlueMetastoreStats.class).withGeneratedName(); + binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeGlueV1MetastoreTableOperationsProvider.class).in(Scopes.SINGLETON); binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true); + // Limit per Glue API docs (https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html#Glue-Type-TableInput-Parameters as of this writing) + binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(512000); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java new file mode 100644 index 000000000000..85dc4a7d5d6d --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java @@ -0,0 +1,74 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.glue.v1; + +import com.amazonaws.services.glue.AWSGlueAsync; +import com.amazonaws.services.glue.model.EntityNotFoundException; +import com.amazonaws.services.glue.model.GetTableRequest; +import com.amazonaws.services.glue.model.Table; +import com.amazonaws.services.glue.model.TableInput; +import com.amazonaws.services.glue.model.UpdateTableRequest; +import com.google.common.collect.ImmutableMap; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.TableNotFoundException; + +import java.util.Optional; + +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertGlueTableToTableInput; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; +import static java.util.Objects.requireNonNull; + +public class DeltaLakeGlueV1MetastoreTableOperations + implements DeltaLakeTableOperations +{ + private final AWSGlueAsync glueClient; + private final GlueMetastoreStats stats; + + public DeltaLakeGlueV1MetastoreTableOperations(AWSGlueAsync glueClient, GlueMetastoreStats stats) + { + this.glueClient = requireNonNull(glueClient, "glueClient is null"); + this.stats = requireNonNull(stats, "stats is null"); + } + + @Override + public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment) + { + GetTableRequest getTableRequest = new GetTableRequest() + .withDatabaseName(schemaTableName.getSchemaName()) + .withName(schemaTableName.getTableName()); + Table currentTable; + try { + currentTable = glueClient.getTable(getTableRequest).getTable(); + } + catch (EntityNotFoundException e) { + throw new TableNotFoundException(schemaTableName); + } + String glueVersionId = currentTable.getVersionId(); + + TableInput tableInput = convertGlueTableToTableInput(currentTable); + ImmutableMap.Builder parameters = ImmutableMap.builder(); + parameters.putAll(getTableParameters(currentTable)); + parameters.putAll(tableMetadataParameters(version, schemaString, tableComment)); + tableInput.withParameters(parameters.buildOrThrow()); + + UpdateTableRequest updateTableRequest = new UpdateTableRequest() + .withDatabaseName(schemaTableName.getSchemaName()) + .withTableInput(tableInput) + .withVersionId(glueVersionId); + stats.getUpdateTable().call(() -> glueClient.updateTable(updateTableRequest)); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java new file mode 100644 index 000000000000..1c81a7f7e04b --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.glue.v1; + +import com.amazonaws.services.glue.AWSGlueAsync; +import com.google.inject.Inject; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; +import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats; +import io.trino.spi.connector.ConnectorSession; + +import static java.util.Objects.requireNonNull; + +public class DeltaLakeGlueV1MetastoreTableOperationsProvider + implements DeltaLakeTableOperationsProvider +{ + private final AWSGlueAsync glueClient; + private final GlueMetastoreStats stats; + + @Inject + public DeltaLakeGlueV1MetastoreTableOperationsProvider(AWSGlueAsync glueClient, GlueMetastoreStats stats) + { + this.glueClient = requireNonNull(glueClient, "glueClient is null"); + this.stats = requireNonNull(stats, "stats is null"); + } + + @Override + public DeltaLakeTableOperations createTableOperations(ConnectorSession session) + { + return new DeltaLakeGlueV1MetastoreTableOperations(glueClient, stats); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java index e9954d6d7c8d..8946e5ec9c58 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java @@ -15,8 +15,12 @@ import com.google.inject.Binder; import com.google.inject.Key; +import com.google.inject.Scopes; import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename; +import io.trino.plugin.deltalake.MaxTableParameterLength; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; +import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreTableOperationsProvider; import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreModule; public class DeltaLakeThriftMetastoreModule @@ -26,6 +30,13 @@ public class DeltaLakeThriftMetastoreModule protected void setup(Binder binder) { install(new ThriftMetastoreModule()); + binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeFileMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON); binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(false); + // Limit per Hive metastore code (https://github.com/apache/hive/tree/7f6367e0c6e21b11ef62da1ea6681a54d547de07/standalone-metastore/metastore-server/src/main/sql as of this writing) + // - MySQL: mediumtext (16777215) + // - SQL Server: nvarchar(max) (2147483647) + // - Oracle: clob (4294967295) + // - PostgreSQL: text (unlimited) + binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(16777215); } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperations.java new file mode 100644 index 000000000000..055b3c0d7b81 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperations.java @@ -0,0 +1,75 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.thrift; + +import com.google.common.collect.ImmutableMap; +import io.trino.metastore.AcidTransactionOwner; +import io.trino.metastore.HiveMetastore; +import io.trino.metastore.Table; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.plugin.hive.metastore.thrift.ThriftMetastore; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.TableNotFoundException; + +import java.util.Map; +import java.util.Optional; + +import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters; +import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet; +import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable; +import static java.util.Objects.requireNonNull; + +public class DeltaLakeThriftMetastoreTableOperations + implements DeltaLakeTableOperations +{ + private final ConnectorSession session; + private final HiveMetastore metastore; + private final ThriftMetastore thriftMetastore; + + public DeltaLakeThriftMetastoreTableOperations( + ConnectorSession session, + HiveMetastore metastore, + ThriftMetastore thriftMetastore) + { + this.session = requireNonNull(session, "session is null"); + this.metastore = requireNonNull(metastore, "metastore is null"); + this.thriftMetastore = requireNonNull(thriftMetastore, "thriftMetastore is null"); + } + + @Override + public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment) + { + long lockId = thriftMetastore.acquireTableExclusiveLock( + new AcidTransactionOwner(session.getUser()), + session.getQueryId(), + schemaTableName.getSchemaName(), + schemaTableName.getTableName()); + + try { + Table currentTable = fromMetastoreApiTable(thriftMetastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName()) + .orElseThrow(() -> new TableNotFoundException(schemaTableName))); + Map parameters = ImmutableMap.builder() + .putAll(currentTable.getParameters()) + .putAll(tableMetadataParameters(version, schemaString, tableComment)) + .buildKeepingLast(); + Table updatedTable = currentTable.withParameters(parameters); + + metastore.replaceTable(currentTable.getDatabaseName(), currentTable.getTableName(), updatedTable, buildInitialPrivilegeSet(currentTable.getOwner().orElseThrow())); + } + finally { + thriftMetastore.releaseTableLock(lockId); + } + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperationsProvider.java new file mode 100644 index 000000000000..9ff99a10cc55 --- /dev/null +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperationsProvider.java @@ -0,0 +1,47 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore.thrift; + +import com.google.inject.Inject; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreFactory; +import io.trino.spi.connector.ConnectorSession; +import io.trino.spi.security.ConnectorIdentity; + +import java.util.Optional; + +import static java.util.Objects.requireNonNull; + +public class DeltaLakeThriftMetastoreTableOperationsProvider + implements DeltaLakeTableOperationsProvider +{ + private final HiveMetastoreFactory hiveMetastoreFactory; + private final ThriftMetastoreFactory thriftMetastoreFactory; + + @Inject + public DeltaLakeThriftMetastoreTableOperationsProvider(HiveMetastoreFactory hiveMetastoreFactory, ThriftMetastoreFactory thriftMetastoreFactory) + { + this.hiveMetastoreFactory = requireNonNull(hiveMetastoreFactory, "hiveMetastoreFactory is null"); + this.thriftMetastoreFactory = requireNonNull(thriftMetastoreFactory, "thriftMetastoreFactory is null"); + } + + @Override + public DeltaLakeTableOperations createTableOperations(ConnectorSession session) + { + Optional identity = Optional.of(session.getIdentity()); + return new DeltaLakeThriftMetastoreTableOperations(session, hiveMetastoreFactory.createMetastore(identity), thriftMetastoreFactory.createMetastore(identity)); + } +} diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java index 52c4dd645151..a36c7edb2aac 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java @@ -27,6 +27,7 @@ import io.trino.plugin.deltalake.DeltaLakeMetadataFactory; import io.trino.plugin.deltalake.metastore.DeltaLakeMetastore; import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess; +import io.trino.plugin.deltalake.transactionlog.MetadataEntry; import io.trino.plugin.deltalake.transactionlog.TableSnapshot; import io.trino.plugin.deltalake.transactionlog.TransactionLogAccess; import io.trino.spi.TrinoException; @@ -44,7 +45,6 @@ import static io.trino.plugin.base.util.Procedures.checkProcedureArgument; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_FILESYSTEM_ERROR; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_TABLE; -import static io.trino.plugin.deltalake.DeltaLakeMetadata.buildTable; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir; import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet; import static io.trino.spi.StandardErrorCode.GENERIC_USER_ERROR; @@ -158,15 +158,14 @@ private void doRegisterTable( throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Failed checking table location %s", tableLocation), e); } - Table table = buildTable(session, schemaTableName, tableLocation, true); - - PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow()); statisticsAccess.invalidateCache(schemaTableName, Optional.of(tableLocation)); transactionLogAccess.invalidateCache(schemaTableName, Optional.of(tableLocation)); // Verify we're registering a location with a valid table + TableSnapshot tableSnapshot; + MetadataEntry metadataEntry; try { - TableSnapshot tableSnapshot = transactionLogAccess.loadSnapshot(session, table.getSchemaTableName(), tableLocation, Optional.empty()); - transactionLogAccess.getMetadataEntry(session, tableSnapshot); // verify metadata exists + tableSnapshot = transactionLogAccess.loadSnapshot(session, schemaTableName, tableLocation, Optional.empty()); + metadataEntry = transactionLogAccess.getMetadataEntry(session, tableSnapshot); } catch (TrinoException e) { throw e; @@ -175,6 +174,15 @@ private void doRegisterTable( throw new TrinoException(DELTA_LAKE_INVALID_TABLE, "Failed to access table location: " + tableLocation, e); } + Table table = metadata.buildTable( + session, + schemaTableName, + tableLocation, + true, + Optional.ofNullable(metadataEntry.getDescription()), + tableSnapshot.getVersion(), + metadataEntry.getSchemaString()); + PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow()); metastore.createTable(table, principalPrivileges); } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java index ba9cef997bc6..f576e89d542d 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java @@ -64,6 +64,8 @@ public void testDefaults() .setDeleteSchemaLocationsFallback(false) .setParquetTimeZone(TimeZone.getDefault().getID()) .setPerTransactionMetastoreCacheMaximumSize(1000) + .setStoreTableMetadataEnabled(false) + .setStoreTableMetadataThreads(5) .setTargetMaxFileSize(DataSize.of(1, GIGABYTE)) .setIdleWriterMinFileSize(DataSize.of(16, MEGABYTE)) .setUniqueTableLocation(true) @@ -99,6 +101,8 @@ public void testExplicitPropertyMappings() .put("delta.compression-codec", "GZIP") .put("delta.per-transaction-metastore-cache-maximum-size", "500") .put("delta.delete-schema-locations-fallback", "true") + .put("delta.metastore.store-table-metadata", "true") + .put("delta.metastore.store-table-metadata-threads", "1") .put("delta.parquet.time-zone", nonDefaultTimeZone().getID()) .put("delta.target-max-file-size", "2 GB") .put("delta.idle-writer-min-file-size", "1MB") @@ -133,6 +137,8 @@ public void testExplicitPropertyMappings() .setDeleteSchemaLocationsFallback(true) .setParquetTimeZone(nonDefaultTimeZone().getID()) .setPerTransactionMetastoreCacheMaximumSize(500) + .setStoreTableMetadataEnabled(true) + .setStoreTableMetadataThreads(1) .setTargetMaxFileSize(DataSize.of(2, GIGABYTE)) .setIdleWriterMinFileSize(DataSize.of(1, MEGABYTE)) .setUniqueTableLocation(false) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java index 5fe1565a51e1..42aa22aea22e 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java @@ -16,16 +16,20 @@ import com.google.common.base.Stopwatch; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.io.Resources; import io.airlift.units.DataSize; import io.trino.Session; import io.trino.execution.QueryInfo; import io.trino.metastore.HiveMetastore; +import io.trino.metastore.Table; import io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.ColumnMappingMode; import io.trino.plugin.hive.HiveCompressionCodec; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.tpch.TpchPlugin; +import io.trino.spi.connector.ColumnMetadata; import io.trino.sql.planner.plan.FilterNode; import io.trino.sql.planner.plan.TableDeleteNode; import io.trino.sql.planner.plan.TableFinishNode; @@ -40,6 +44,7 @@ import io.trino.testing.containers.Minio; import io.trino.testing.minio.MinioClient; import io.trino.testing.sql.TestTable; +import io.trino.testing.sql.TestView; import io.trino.testing.sql.TrinoSqlExecutor; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; @@ -64,10 +69,13 @@ import static io.trino.plugin.deltalake.DeltaLakeMetadata.CREATE_OR_REPLACE_TABLE_OPERATION; import static io.trino.plugin.deltalake.DeltaLakeMetadata.CREATE_TABLE_OPERATION; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; +import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.getColumnMetadata; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.TRANSACTION_LOG_DIRECTORY; import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE; import static io.trino.plugin.hive.TableType.MANAGED_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet; import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME; +import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.TimeZoneKey.getTimeZoneKey; import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.sql.planner.optimizations.PlanNodeSearcher.searchFrom; @@ -79,11 +87,14 @@ import static io.trino.testing.TestingConnectorBehavior.SUPPORTS_CREATE_SCHEMA; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.testing.TestingSession.testSessionBuilder; +import static io.trino.testing.assertions.Assert.assertEventually; import static io.trino.testing.containers.Minio.MINIO_ACCESS_KEY; import static io.trino.testing.containers.Minio.MINIO_REGION; import static io.trino.testing.containers.Minio.MINIO_SECRET_KEY; +import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Map.entry; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -130,9 +141,12 @@ protected QueryRunner createQueryRunner() .put("s3.endpoint", minio.getMinioAddress()) .put("s3.path-style-access", "true") .put("s3.streaming.part-size", "5MB") // minimize memory usage + .put("delta.metastore.store-table-metadata", "true") .put("delta.enable-non-concurrent-writes", "true") .put("delta.register-table-procedure.enabled", "true") .buildOrThrow()); + metastore = TestingDeltaLakeUtils.getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); queryRunner.execute("CREATE SCHEMA " + SCHEMA + " WITH (location = 's3://" + bucketName + "/" + SCHEMA + "')"); queryRunner.execute("CREATE SCHEMA schemawithoutunderscore WITH (location = 's3://" + bucketName + "/schemawithoutunderscore')"); @@ -4757,4 +4771,280 @@ public void testDuplicatedFieldNames() assertQueryFails("ALTER TABLE " + table.getName() + " ALTER COLUMN col SET DATA TYPE row(x int, \"X\" int)", "This connector does not support setting column types"); } } + + @Test + public void testMetastoreAfterCreateTable() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) { + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("comment", "test comment"), + entry("trino_last_transaction_version", "0"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")); + } + } + + @Test + public void testMetastoreAfterCreateOrReplaceTable() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) { + assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + "(new_col varchar) COMMENT 'new comment'"); + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("comment", "new comment"), + entry("trino_last_transaction_version", "1"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"new_col\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}")); + } + } + + @Test + public void testMetastoreAfterCreateTableAsSelect() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "COMMENT 'test comment' AS SELECT 1 col")) { + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("comment", "test comment"), + entry("trino_last_transaction_version", "0"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")); + } + } + + @Test + public void testMetastoreAfterCreateOrReplaceTableAsSelect() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "COMMENT 'test comment' AS SELECT 1 col")) { + assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + " COMMENT 'new comment' AS SELECT 'test' new_col", 1); + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("comment", "new comment"), + entry("trino_last_transaction_version", "1"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"new_col\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}")); + } + } + + @Test + public void testMetastoreAfterCommentTable() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKey("comment") + .contains( + entry("trino_last_transaction_version", "0"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")); + + assertUpdate("COMMENT ON TABLE " + table.getName() + " IS 'test comment'"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("comment", "test comment"), + entry("trino_last_transaction_version", "1"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"))); + } + } + + @Test + public void testMetastoreAfterCommentColumn() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int COMMENT 'test comment')")) { + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKey("comment") + .contains( + entry("trino_last_transaction_version", "0"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"comment\":\"test comment\"}}]}")); + + assertUpdate("COMMENT ON COLUMN " + table.getName() + ".col IS 'new test comment'"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKey("comment") + .contains( + entry("trino_last_transaction_version", "1"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"comment\":\"new test comment\"}}]}"))); + } + } + + @Test + public void testMetastoreAfterAlterColumn() + { + // Use 'name' column mapping mode to allow renaming columns + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int NOT NULL) WITH (column_mapping_mode = 'name')")) { + Map initialParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters(); + assertThat(initialParameters) + .doesNotContainKey("comment") + .contains(entry("trino_last_transaction_version", "0")); + List initialColumns = getColumnMetadata(initialParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of()); + assertThat(initialColumns).extracting(DeltaLakeColumnMetadata::columnMetadata) + .containsExactly(ColumnMetadata.builder().setName("col").setType(INTEGER).setNullable(false).build()); + + // Drop not null constraints + assertUpdate("ALTER TABLE " + table.getName() + " ALTER COLUMN col DROP NOT NULL"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKey("comment") + .contains(entry("trino_last_transaction_version", "1"))); + Map dropNotNullParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters(); + List dropNotNullColumns = getColumnMetadata(dropNotNullParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of()); + assertThat(dropNotNullColumns).extracting(DeltaLakeColumnMetadata::columnMetadata) + .containsExactly(ColumnMetadata.builder().setName("col").setType(INTEGER).build()); + + // Add a new column + assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN new_col int COMMENT 'test comment'"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKey("comment") + .contains(entry("trino_last_transaction_version", "2"))); + Map addColumnParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters(); + List columnsAfterAddColumn = getColumnMetadata(addColumnParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of()); + assertThat(columnsAfterAddColumn).extracting(DeltaLakeColumnMetadata::columnMetadata) + .containsExactly( + ColumnMetadata.builder().setName("col").setType(INTEGER).build(), + ColumnMetadata.builder().setName("new_col").setType(INTEGER).setComment(Optional.of("test comment")).build()); + + // Rename a column + assertUpdate("ALTER TABLE " + table.getName() + " RENAME COLUMN new_col TO renamed_col"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKey("comment") + .contains(entry("trino_last_transaction_version", "3"))); + Map renameColumnParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters(); + List columnsAfterRenameColumn = getColumnMetadata(renameColumnParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of()); + assertThat(columnsAfterRenameColumn).extracting(DeltaLakeColumnMetadata::columnMetadata) + .containsExactly( + ColumnMetadata.builder().setName("col").setType(INTEGER).build(), + ColumnMetadata.builder().setName("renamed_col").setType(INTEGER).setComment(Optional.of("test comment")).build()); + + // Drop a column + assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN renamed_col"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKey("comment") + .contains(entry("trino_last_transaction_version", "4"))); + Map dropColumnParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters(); + List columnsAfterDropColumn = getColumnMetadata(dropColumnParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of()); + assertThat(columnsAfterDropColumn).extracting(DeltaLakeColumnMetadata::columnMetadata) + .containsExactly(ColumnMetadata.builder().setName("col").setType(INTEGER).build()); + + // Update the following test once the connector supports changing column types + assertQueryFails("ALTER TABLE " + table.getName() + " ALTER COLUMN col SET DATA TYPE bigint", "This connector does not support setting column types"); + } + } + + @Test + public void testMetastoreAfterSetTableProperties() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertUpdate("ALTER TABLE " + table.getName() + " SET PROPERTIES change_data_feed_enabled = true"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("trino_last_transaction_version", "1"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"))); + } + } + + @Test + public void testMetastoreAfterOptimize() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertUpdate("ALTER TABLE " + table.getName() + " EXECUTE optimize"); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("trino_last_transaction_version", "1"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"))); + } + } + + @Test + public void testMetastoreAfterRegisterTable() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) { + assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1); + String tableLocation = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getStorage().getLocation(); + metastore.dropTable(SCHEMA, table.getName(), false); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(SCHEMA, table.getName(), tableLocation)); + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("comment", "test comment"), + entry("trino_last_transaction_version", "1"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")); + } + } + + @Test + public void testMetastoreAfterCreateTableRemotely() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) { + Table metastoreTable = metastore.getTable(SCHEMA, table.getName()).orElseThrow(); + metastore.dropTable(SCHEMA, table.getName(), false); + + // Create a table on metastore directly to avoid cache during the creation + Set filterKeys = ImmutableSet.of("comment", "trino_last_transaction_version", "trino_metadata_schema_string"); + Table newMetastoreTable = Table.builder(metastoreTable) + .setParameters(Maps.filterKeys(metastoreTable.getParameters(), key -> !filterKeys.contains(key))) + .build(); + metastore.createTable(newMetastoreTable, buildInitialPrivilegeSet(metastoreTable.getOwner().orElseThrow())); + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKeys("comment", "trino_last_transaction_version", "trino_metadata_schema_string"); + + // The parameters should contain the cache after the 1st access + assertQueryReturnsEmptyResult("SELECT * FROM " + table.getName()); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains( + entry("comment", "test comment"), + entry("trino_last_transaction_version", "0"), + entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"))); + } + } + + @Test + public void testMetastoreAfterDataManipulation() + { + String schemaString = "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"; + + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "0"), entry("trino_metadata_schema_string", schemaString)); + + assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "1"), entry("trino_metadata_schema_string", schemaString))); + + assertUpdate("UPDATE " + table.getName() + " SET col = 2", 1); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "2"), entry("trino_metadata_schema_string", schemaString))); + + assertUpdate("MERGE INTO " + table.getName() + " t " + + "USING (SELECT * FROM (VALUES 2)) AS s(col) " + + "ON (t.col = s.col) " + + "WHEN MATCHED THEN UPDATE SET col = 3", 1); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "3"), entry("trino_metadata_schema_string", schemaString))); + + assertUpdate("DELETE FROM " + table.getName() + " WHERE col = 3", 1); // row level delete + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "4"), entry("trino_metadata_schema_string", schemaString))); + + assertUpdate("DELETE FROM " + table.getName(), 0); // metadata delete + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "5"), entry("trino_metadata_schema_string", schemaString))); + } + } + + @Test + public void testMetastoreAfterTruncateTable() + { + String schemaString = "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"; + + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "AS SELECT 1 col")) { + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "0"), entry("trino_metadata_schema_string", schemaString)); + + assertUpdate("TRUNCATE TABLE " + table.getName()); + assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "1"), entry("trino_metadata_schema_string", schemaString))); + } + } + + @Test + public void testMetastoreAfterCreateView() + { + try (TestView table = new TestView(getQueryRunner()::execute, "test_cache_metastore", "SELECT 1 col")) { + assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters()) + .doesNotContainKeys("trino_last_transaction_version", "trino_metadata_schema_string") + .contains(entry("comment", "Presto View")); + } + } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java index 957d752a58cc..8cf9717f124c 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java @@ -20,7 +20,9 @@ import io.opentelemetry.sdk.trace.data.SpanData; import io.trino.Session; import io.trino.SystemSessionProperties; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler; import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; import io.trino.testing.QueryRunner; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; @@ -61,6 +63,9 @@ public class TestDeltaLakeFileOperations { private static final int MAX_PREFIXES_COUNT = 10; + // TODO: Consider waiting for scheduled task completion instead of manual triggering + private DeltaLakeTableMetadataScheduler metadataScheduler; + @Override protected QueryRunner createQueryRunner() throws Exception @@ -68,12 +73,16 @@ protected QueryRunner createQueryRunner() Path catalogDir = Files.createTempDirectory("catalog-dir"); closeAfterClass(() -> deleteRecursively(catalogDir, ALLOW_INSECURE)); - return DeltaLakeQueryRunner.builder() + DistributedQueryRunner queryRunner = DeltaLakeQueryRunner.builder() .addCoordinatorProperty("optimizer.experimental-max-prefetched-information-schema-prefixes", Integer.toString(MAX_PREFIXES_COUNT)) .addDeltaProperty("hive.metastore.catalog.dir", catalogDir.toUri().toString()) .addDeltaProperty("delta.enable-non-concurrent-writes", "true") .addDeltaProperty("delta.register-table-procedure.enabled", "true") + .addDeltaProperty("delta.metastore.store-table-metadata", "true") + .addDeltaProperty("delta.metastore.store-table-metadata-threads", "0") // Use the same thread to make the test deterministic .build(); + metadataScheduler = TestingDeltaLakeUtils.getConnectorService(queryRunner, DeltaLakeTableMetadataScheduler.class); + return queryRunner; } @Test @@ -697,14 +706,24 @@ public void testInformationSchemaColumns() assertUpdate(session, "CREATE TABLE test_other_select_i_s_columns" + i + "(id varchar, age integer)"); // won't match the filter } + // Store table metadata in metastore for making the file access counts deterministic + metadataScheduler.process(); + // Bulk retrieval + assertFileSystemAccesses(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA", + ImmutableMultiset.builder() + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables) + .build()); + assertFileSystemAccesses(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA AND table_name LIKE 'test_select_i_s_columns%'", ImmutableMultiset.builder() - .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables) .build()); // Pointed lookup @@ -720,11 +739,10 @@ public void testInformationSchemaColumns() // Pointed lookup with LIKE predicate (as if unintentional) assertFileSystemAccesses(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA AND table_name LIKE 'test_select_i_s_columns0'", ImmutableMultiset.builder() - .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables) .build()); // Pointed lookup via DESCRIBE (which does some additional things before delegating to information_schema.columns) @@ -763,24 +781,33 @@ public void testSystemMetadataTableComments() assertUpdate(session, "CREATE TABLE test_other_select_s_m_t_comments" + i + "(id varchar, age integer)"); // won't match the filter } + // Store table metadata in metastore for making the file access counts deterministic + metadataScheduler.process(); + // Bulk retrieval + assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA", + ImmutableMultiset.builder() + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables) + .build()); + assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA AND table_name LIKE 'test_select_s_m_t_comments%'", ImmutableMultiset.builder() - .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables) .build()); // Bulk retrieval for two schemas assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name IN (CURRENT_SCHEMA, 'non_existent') AND table_name LIKE 'test_select_s_m_t_comments%'", ImmutableMultiset.builder() - .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables) .build()); // Pointed lookup @@ -796,11 +823,10 @@ public void testSystemMetadataTableComments() // Pointed lookup with LIKE predicate (as if unintentional) assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA AND table_name LIKE 'test_select_s_m_t_comments0'", ImmutableMultiset.builder() - .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables) - .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables) + .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables) .build()); for (int i = 0; i < tables; i++) { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java index 75af796f42cd..425db8259745 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java @@ -23,6 +23,8 @@ import io.trino.filesystem.cache.DefaultCachingHostAddressProvider; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.filesystem.memory.MemoryFileSystemFactory; +import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler; +import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreTableOperationsProvider; import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess; import io.trino.plugin.deltalake.statistics.ExtendedStatistics; import io.trino.plugin.deltalake.statistics.MetaDirStatisticsAccess; @@ -67,6 +69,7 @@ import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static java.lang.Math.clamp; import static org.assertj.core.api.Assertions.assertThat; @@ -213,8 +216,9 @@ public Stream getActiveFiles( new FileFormatDataSourceStats(), JsonCodec.jsonCodec(LastCheckpoint.class)); + HiveMetastoreFactory hiveMetastoreFactory = HiveMetastoreFactory.ofInstance(createTestingFileHiveMetastore(new MemoryFileSystemFactory(), Location.of("memory:///"))); DeltaLakeMetadataFactory metadataFactory = new DeltaLakeMetadataFactory( - HiveMetastoreFactory.ofInstance(createTestingFileHiveMetastore(new MemoryFileSystemFactory(), Location.of("memory:///"))), + hiveMetastoreFactory, hdfsFileSystemFactory, transactionLogAccess, typeManager, @@ -229,7 +233,8 @@ public Stream getActiveFiles( DeltaLakeRedirectionsProvider.NOOP, new CachingExtendedStatisticsAccess(new MetaDirStatisticsAccess(HDFS_FILE_SYSTEM_FACTORY, new JsonCodecFactory().jsonCodec(ExtendedStatistics.class))), true, - new NodeVersion("test_version")); + new NodeVersion("test_version"), + new DeltaLakeTableMetadataScheduler(new TestingNodeManager(), TESTING_TYPE_MANAGER, new DeltaLakeFileMetastoreTableOperationsProvider(hiveMetastoreFactory), Integer.MAX_VALUE, new DeltaLakeConfig())); ConnectorSession session = testingConnectorSessionWithConfig(deltaLakeConfig); DeltaLakeTransactionManager deltaLakeTransactionManager = new DeltaLakeTransactionManager(metadataFactory); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java index 9e5af371fb43..a6785c3780c0 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java @@ -14,16 +14,32 @@ package io.trino.plugin.deltalake.metastore; import com.google.common.collect.ImmutableMultiset; +import com.google.common.collect.Maps; import com.google.common.collect.Multiset; +import com.google.common.collect.Sets; +import io.opentelemetry.sdk.trace.data.SpanData; +import io.trino.Session; +import io.trino.metastore.HiveMetastore; +import io.trino.metastore.Table; import io.trino.plugin.deltalake.DeltaLakeQueryRunner; +import io.trino.plugin.deltalake.TestingDeltaLakeUtils; +import io.trino.plugin.hive.metastore.HiveMetastoreFactory; import io.trino.plugin.hive.metastore.MetastoreMethod; import io.trino.testing.AbstractTestQueryFramework; import io.trino.testing.QueryRunner; +import io.trino.testing.sql.TestTable; import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.parallel.Execution; -import static io.trino.plugin.hive.metastore.MetastoreInvocations.assertMetastoreInvocationsForQuery; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.TPCH_SCHEMA; +import static io.trino.plugin.hive.metastore.MetastoreInvocations.filterInvocations; import static io.trino.plugin.hive.metastore.MetastoreMethod.CREATE_TABLE; import static io.trino.plugin.hive.metastore.MetastoreMethod.DROP_TABLE; import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_DATABASES; @@ -31,17 +47,34 @@ import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE; import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLES; import static io.trino.plugin.hive.metastore.MetastoreMethod.REPLACE_TABLE; +import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet; +import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; +import static java.util.Map.entry; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; @Execution(SAME_THREAD) // metastore invocation counters shares mutable state so can't be run from many threads simultaneously public class TestDeltaLakeMetastoreAccessOperations extends AbstractTestQueryFramework { + private HiveMetastore metastore; + private DeltaLakeTableMetadataScheduler metadataScheduler; + @Override protected QueryRunner createQueryRunner() throws Exception { - return DeltaLakeQueryRunner.builder().build(); + QueryRunner queryRunner = DeltaLakeQueryRunner.builder() + .addDeltaProperty("delta.register-table-procedure.enabled", "true") + .addDeltaProperty("delta.metastore.store-table-metadata", "true") + .addDeltaProperty("delta.metastore.store-table-metadata-threads", "0") // Use the same thread to make the test deterministic + .build(); + + metastore = TestingDeltaLakeUtils.getConnectorService(queryRunner, HiveMetastoreFactory.class) + .createMetastore(Optional.empty()); + metadataScheduler = TestingDeltaLakeUtils.getConnectorService(queryRunner, DeltaLakeTableMetadataScheduler.class); + + return queryRunner; } @Test @@ -254,10 +287,343 @@ public void testShowTables() .build()); } + @Test + public void testSelectWithoutMetadataInMetastore() + { + assertUpdate("CREATE TABLE test_select_without_cache (id VARCHAR, age INT)"); + + removeMetadataCachingPropertiesFromMetastore("test_select_without_cache"); + assertMetastoreInvocations( + getSession(), + "SELECT * FROM test_select_without_cache", + ImmutableMultiset.builder() + .add(GET_TABLE) + .build(), + asyncInvocations(true)); // async invocations happen because the table metadata is not stored + assertMetastoreInvocations("SELECT * FROM test_select_without_cache", + ImmutableMultiset.builder() + .add(GET_TABLE) + .build()); + } + + @Test + public void testUnionWithoutMetadataInMetastore() + { + assertUpdate("CREATE TABLE test_union_without_cache (id VARCHAR, age INT)"); + assertMetastoreInvocations("SELECT * FROM test_union_without_cache UNION ALL SELECT * FROM test_union_without_cache", + ImmutableMultiset.builder() + .add(GET_TABLE) + .build()); + + removeMetadataCachingPropertiesFromMetastore("test_union_without_cache"); + assertMetastoreInvocations( + getSession(), + "SELECT * FROM test_union_without_cache UNION ALL SELECT * FROM test_union_without_cache", + ImmutableMultiset.builder() + .add(GET_TABLE) + .build(), + asyncInvocations(true)); // async invocations happen because the table metadata is not stored + assertMetastoreInvocations("SELECT * FROM test_union_without_cache UNION ALL SELECT * FROM test_union_without_cache", + ImmutableMultiset.builder() + .add(GET_TABLE) + .build()); + } + + @Test + public void testSelectVersionedWithoutMetadataInMetastore() + { + assertUpdate("CREATE TABLE test_select_versioned_without_cache AS SELECT 2 as age", 1); + + // Time travel query should not cache the metadata because the definition might be different from the latest verion + removeMetadataCachingPropertiesFromMetastore("test_select_versioned_without_cache"); + assertMetastoreInvocations("SELECT * FROM test_select_versioned_without_cache FOR VERSION AS OF 0", + ImmutableMultiset.builder() + .add(GET_TABLE) + .build()); + } + + @Test + public void testStoreMetastoreCreateOrReplaceTable() + { + testStoreMetastoreCreateOrReplaceTable(true); + testStoreMetastoreCreateOrReplaceTable(false); + } + + private void testStoreMetastoreCreateOrReplaceTable(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + + assertMetastoreInvocations(session, "CREATE OR REPLACE TABLE test_create_or_replace_without_cache (id VARCHAR, age INT)", + ImmutableMultiset.builder() + .add(GET_DATABASE) + .add(GET_TABLE) + .add(storeTableMetadata ? CREATE_TABLE : REPLACE_TABLE) + .build()); + removeMetadataCachingPropertiesFromMetastore("test_create_or_replace_without_cache"); + assertMetastoreInvocations( + session, + "CREATE OR REPLACE TABLE test_create_or_replace_without_cache (id VARCHAR, age INT)", + ImmutableMultiset.builder() + .add(GET_DATABASE) + .add(GET_TABLE) + .add(REPLACE_TABLE) + .build(), + asyncInvocations(storeTableMetadata)); + } + + @Test + public void testStoreMetastoreCreateTableOrReplaceTableAsSelect() + { + testStoreMetastoreCreateTableOrReplaceTableAsSelect(true); + testStoreMetastoreCreateTableOrReplaceTableAsSelect(false); + } + + private void testStoreMetastoreCreateTableOrReplaceTableAsSelect(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + + assertMetastoreInvocations(session, "CREATE OR REPLACE TABLE test_ctas_without_cache AS SELECT 1 AS age", + ImmutableMultiset.builder() + .add(GET_DATABASE) + .add(storeTableMetadata ? CREATE_TABLE : REPLACE_TABLE) + .add(GET_TABLE) + .build()); + removeMetadataCachingPropertiesFromMetastore("test_ctas_without_cache"); + assertMetastoreInvocations(session, "CREATE OR REPLACE TABLE test_ctas_without_cache AS SELECT 1 AS age", ImmutableMultiset.builder() + .add(GET_DATABASE) + .add(GET_TABLE) + .add(REPLACE_TABLE) + .build(), + asyncInvocations(storeTableMetadata)); + } + + @Test + public void testStoreMetastoreCommentTable() + { + testStoreMetastoreCommentTable(true); + testStoreMetastoreCommentTable(false); + } + + private void testStoreMetastoreCommentTable(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertMetastoreInvocations(session, "COMMENT ON TABLE " + table.getName() + " IS 'test comment'", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + } + } + + @Test + public void testStoreMetastoreCommentColumn() + { + testStoreMetastoreCommentColumn(true); + testStoreMetastoreCommentColumn(false); + } + + private void testStoreMetastoreCommentColumn(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int COMMENT 'test comment')")) { + assertMetastoreInvocations(session, "COMMENT ON COLUMN " + table.getName() + ".col IS 'new test comment'", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + } + } + + @Test + public void testStoreMetastoreAlterColumn() + { + testStoreMetastoreAlterColumn(true); + testStoreMetastoreAlterColumn(false); + } + + private void testStoreMetastoreAlterColumn(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + + // Use 'name' column mapping mode to allow renaming columns + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int NOT NULL) WITH (column_mapping_mode = 'name')")) { + assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " ALTER COLUMN col DROP NOT NULL", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " ADD COLUMN new_col int COMMENT 'test comment'", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " RENAME COLUMN new_col TO renamed_col", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " DROP COLUMN renamed_col", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + // Update the following test once the connector supports changing column types + assertQueryFails(session, "ALTER TABLE " + table.getName() + " ALTER COLUMN col SET DATA TYPE bigint", "This connector does not support setting column types"); + } + } + + @Test + public void testStoreMetastoreSetTableProperties() + { + testStoreMetastoreSetTableProperties(true); + testStoreMetastoreSetTableProperties(false); + } + + private void testStoreMetastoreSetTableProperties(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " SET PROPERTIES change_data_feed_enabled = true", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + } + } + + @Test + public void testStoreMetastoreOptimize() + { + testStoreMetastoreOptimize(true); + testStoreMetastoreOptimize(false); + } + + private void testStoreMetastoreOptimize(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " EXECUTE optimize", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + } + } + + @Test + public void testStoreMetastoreVacuum() + { + testStoreMetastoreVacuum(true); + testStoreMetastoreVacuum(false); + } + + private void testStoreMetastoreVacuum(boolean storeTableMetadata) + { + Session session = Session.builder(getSession()) + .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "store_table_metadata", Boolean.toString(storeTableMetadata)) + .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "vacuum_min_retention", "0s") + .build(); + + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "AS SELECT 1 a")) { + assertUpdate("UPDATE " + table.getName() + " SET a = 2", 1); + assertMetastoreInvocations( + session, + "CALL system.vacuum(schema_name => CURRENT_SCHEMA, table_name => '" + table.getName() + "', retention => '0s')", + ImmutableMultiset.of(GET_TABLE)); + } + } + + @Test + public void testStoreMetastoreRegisterTable() + { + testStoreMetastoreRegisterTable(true); + testStoreMetastoreRegisterTable(false); + } + + private void testStoreMetastoreRegisterTable(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) { + assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1); + String tableLocation = metastore.getTable(TPCH_SCHEMA, table.getName()).orElseThrow().getStorage().getLocation(); + metastore.dropTable(TPCH_SCHEMA, table.getName(), false); + + assertMetastoreInvocations( + session, + "CALL system.register_table('%s', '%s', '%s')".formatted(TPCH_SCHEMA, table.getName(), tableLocation), + ImmutableMultiset.of(GET_DATABASE, CREATE_TABLE)); + } + } + + @Test + public void testStoreMetastoreDataManipulation() + { + testStoreMetastoreDataManipulation(true); + testStoreMetastoreDataManipulation(false); + } + + private void testStoreMetastoreDataManipulation(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + String schemaString = "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"; + + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) { + assertThat(metastore.getTable(TPCH_SCHEMA, table.getName()).orElseThrow().getParameters()) + .contains(entry("trino_last_transaction_version", "0"), entry("trino_metadata_schema_string", schemaString)); + + assertMetastoreInvocations(session, "INSERT INTO " + table.getName() + " VALUES 1", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + assertMetastoreInvocations(session, "UPDATE " + table.getName() + " SET col = 2", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + assertMetastoreInvocations(session, "MERGE INTO " + table.getName() + " t " + + "USING (SELECT * FROM (VALUES 2)) AS s(col) " + + "ON (t.col = s.col) " + + "WHEN MATCHED THEN UPDATE SET col = 3", + ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + assertMetastoreInvocations(session, "DELETE FROM " + table.getName() + " WHERE col = 3", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); // row level delete + assertMetastoreInvocations(session, "DELETE FROM " + table.getName(), ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); // metadata delete + } + } + + @Test + public void testStoreMetastoreTruncateTable() + { + testStoreMetastoreTruncateTable(true); + testStoreMetastoreTruncateTable(false); + } + + private void testStoreMetastoreTruncateTable(boolean storeTableMetadata) + { + Session session = sessionWithStoreTableMetadata(storeTableMetadata); + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "AS SELECT 1 col")) { + assertMetastoreInvocations(session, "TRUNCATE TABLE " + table.getName(), ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); + } + } + + private void removeMetadataCachingPropertiesFromMetastore(String tableName) + { + Table table = metastore.getTable(getSession().getSchema().orElseThrow(), tableName).orElseThrow(); + Table newMetastoreTable = Table.builder(table) + .setParameters(Maps.filterKeys(table.getParameters(), key -> !key.equals("trino_last_transaction_version"))) + .build(); + metastore.replaceTable(table.getDatabaseName(), table.getTableName(), newMetastoreTable, buildInitialPrivilegeSet(table.getOwner().orElseThrow())); + } + + private Session sessionWithStoreTableMetadata(boolean storeTableMetadata) + { + return Session.builder(getSession()) + .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "store_table_metadata", Boolean.toString(storeTableMetadata)) + .build(); + } + private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations) + { + assertMetastoreInvocations(getSession(), query, expectedInvocations, ImmutableMultiset.of()); + } + + private void assertMetastoreInvocations(Session session, @Language("SQL") String query, Multiset expectedInvocations) + { + assertMetastoreInvocations(session, query, expectedInvocations, ImmutableMultiset.of()); + } + + private void assertMetastoreInvocations(Session session, @Language("SQL") String query, Multiset expectedInvocations, Multiset asyncInvocations) { assertUpdate("CALL system.flush_metadata_cache()"); - assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), getSession(), query, expectedInvocations); + metadataScheduler.clear(); + assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), session, query, expectedInvocations, () -> metadataScheduler.process(), asyncInvocations); + } + + private static Multiset asyncInvocations(boolean storeTableParameter) + { + return storeTableParameter ? ImmutableMultiset.of(GET_TABLE, REPLACE_TABLE) : ImmutableMultiset.of(); + } + + private static void assertMetastoreInvocationsForQuery( + QueryRunner queryRunner, + Session session, + @Language("SQL") String query, + Multiset expectedInvocations, + Runnable asyncOperation, + Multiset expectedInvocationsAfterAsync) + { + queryRunner.execute(session, query); + List spansBeforeAsync = queryRunner.getSpans(); + + asyncOperation.run(); + Set spansAfterAsync = Sets.difference(new HashSet<>(queryRunner.getSpans()), new HashSet<>(spansBeforeAsync)); + + Multiset invocations = filterInvocations(spansBeforeAsync); + assertMultisetsEqual(invocations, expectedInvocations); + + Multiset asyncInvocations = filterInvocations(spansAfterAsync.stream().collect(toImmutableList())); + assertMultisetsEqual(asyncInvocations, expectedInvocationsAfterAsync); } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeTableMetadataScheduler.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeTableMetadataScheduler.java new file mode 100644 index 000000000000..1e07f7402ba5 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeTableMetadataScheduler.java @@ -0,0 +1,133 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.deltalake.metastore; + +import io.trino.Session; +import io.trino.metastore.HiveMetastore; +import io.trino.plugin.deltalake.TestingDeltaLakePlugin; +import io.trino.plugin.jmx.JmxPlugin; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.QueryRunner; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; + +import java.lang.reflect.InvocationTargetException; +import java.nio.file.Path; +import java.time.Duration; +import java.util.Optional; +import java.util.stream.IntStream; + +import static com.google.common.reflect.Reflection.newProxy; +import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly; +import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static java.time.temporal.ChronoUnit.SECONDS; + +final class TestDeltaLakeTableMetadataScheduler + extends AbstractTestQueryFramework +{ + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + Session session = testSessionBuilder() + .setCatalog("delta") + .setSchema("default") + .build(); + + QueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); + Path dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta"); + HiveMetastore metastore = createTestingFileHiveMetastore(dataDirectory.toFile()); + + HiveMetastore proxiedMetastore = newProxy(HiveMetastore.class, (_, method, args) -> { + try { + if (method.getName().equals("replaceTable")) { + throw new UnsupportedOperationException(); + } + return method.invoke(metastore, args); + } + catch (InvocationTargetException e) { + throw e.getCause(); + } + }); + + queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.of(new TestingDeltaLakeMetastoreModule(proxiedMetastore)))); + queryRunner.createCatalog("delta", "delta_lake", ImmutableMap.of("delta.metastore.store-table-metadata", "true")); + + queryRunner.installPlugin(new JmxPlugin()); + queryRunner.createCatalog("jmx", "jmx"); + + queryRunner.execute("CREATE SCHEMA delta.default"); + + return queryRunner; + } + + @Test + @Disabled // TODO Enable after fixing the flaky assertion with JMX + void testFailureStopScheduler() + { + String coordinatorId = (String) computeScalar("SELECT node_id FROM system.runtime.nodes WHERE coordinator = true"); + + IntStream.range(0, 11).forEach(i -> assertUpdate("CREATE TABLE test_" + i + "(x int) WITH (column_mapping_mode = 'name')")); + + assertQuery( + "SELECT shutdown FROM jmx.current.\"trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler\" " + + "WHERE node = '" + coordinatorId + "'", + "VALUES false"); + + // The max failure count is 10, so the scheduler should be stopped after 11 operations + IntStream.range(0, 11).forEach(i -> { + assertUpdate("ALTER TABLE test_" + i + " RENAME COLUMN x to y"); + assertUpdate("COMMENT ON TABLE test_" + i + " IS 'test comment'"); + }); + sleepUninterruptibly(Duration.of(1, SECONDS)); + + assertQuery( + "SELECT shutdown FROM jmx.current.\"trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler\" " + + "WHERE node = '" + coordinatorId + "'", + "VALUES true"); + + // Metadata should return the correct values regardless of the scheduler status + assertQuery( + "SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA", + "VALUES " + + "('test_0', 'y'), " + + "('test_1', 'y'), " + + "('test_2', 'y'), " + + "('test_3', 'y'), " + + "('test_4', 'y'), " + + "('test_5', 'y'), " + + "('test_6', 'y'), " + + "('test_7', 'y'), " + + "('test_8', 'y'), " + + "('test_9', 'y'), " + + "('test_10', 'y')"); + assertQuery( + "SELECT table_name, comment FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA", + "VALUES " + + "('test_0', 'test comment'), " + + "('test_1', 'test comment'), " + + "('test_2', 'test comment'), " + + "('test_3', 'test comment'), " + + "('test_4', 'test comment'), " + + "('test_5', 'test comment'), " + + "('test_6', 'test comment'), " + + "('test_7', 'test comment'), " + + "('test_8', 'test comment'), " + + "('test_9', 'test comment'), " + + "('test_10', 'test comment')"); + } +} diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java index a6bf3d3b138e..adea03218e84 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java @@ -15,9 +15,12 @@ import com.google.inject.Binder; import com.google.inject.Key; +import com.google.inject.Scopes; import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.metastore.HiveMetastore; import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename; +import io.trino.plugin.deltalake.MaxTableParameterLength; +import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreTableOperationsProvider; import io.trino.plugin.hive.HideDeltaLakeTables; import io.trino.plugin.hive.metastore.CachingHiveMetastoreModule; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; @@ -40,8 +43,10 @@ public void setup(Binder binder) { binder.bind(HiveMetastoreFactory.class).annotatedWith(RawHiveMetastoreFactory.class).toInstance(HiveMetastoreFactory.ofInstance(metastore)); install(new CachingHiveMetastoreModule(false)); + binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeFileMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON); binder.bind(Key.get(boolean.class, HideDeltaLakeTables.class)).toInstance(false); binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true); + binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(Integer.MAX_VALUE); } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index 65ba98816c06..65f89b24c407 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -667,7 +667,7 @@ public void renameTable(String databaseName, String tableName, String newDatabas } } - private static TableInput.Builder asTableInputBuilder(software.amazon.awssdk.services.glue.model.Table table) + public static TableInput.Builder asTableInputBuilder(software.amazon.awssdk.services.glue.model.Table table) { return TableInput.builder() .name(table.name()) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java index eb7de260bbbe..9bb7a00ebf7e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java @@ -155,10 +155,10 @@ import static io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn; import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertFunction; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertGlueTableToTableInput; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertPartition; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType; -import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.mappedCopy; import static io.trino.plugin.hive.util.HiveUtil.escapeSchemaName; import static io.trino.spi.StandardErrorCode.ALREADY_EXISTS; @@ -510,7 +510,7 @@ public void renameTable(String databaseName, String tableName, String newDatabas GetTableRequest getTableRequest = new GetTableRequest().withDatabaseName(databaseName) .withName(tableName); GetTableResult glueTable = glueClient.getTable(getTableRequest); - TableInput tableInput = convertGlueTableToTableInput(glueTable.getTable(), newTableName); + TableInput tableInput = convertGlueTableToTableInput(glueTable.getTable()).withName(newTableName); CreateTableRequest createTableRequest = new CreateTableRequest() .withDatabaseName(newDatabaseName) .withTableInput(tableInput); @@ -533,24 +533,6 @@ public void renameTable(String databaseName, String tableName, String newDatabas } } - private static TableInput convertGlueTableToTableInput(com.amazonaws.services.glue.model.Table glueTable, String newTableName) - { - return new TableInput() - .withName(newTableName) - .withDescription(glueTable.getDescription()) - .withOwner(glueTable.getOwner()) - .withLastAccessTime(glueTable.getLastAccessTime()) - .withLastAnalyzedTime(glueTable.getLastAnalyzedTime()) - .withRetention(glueTable.getRetention()) - .withStorageDescriptor(glueTable.getStorageDescriptor()) - .withPartitionKeys(glueTable.getPartitionKeys()) - .withViewOriginalText(glueTable.getViewOriginalText()) - .withViewExpandedText(glueTable.getViewExpandedText()) - .withTableType(getTableTypeNullable(glueTable)) - .withTargetTable(glueTable.getTargetTable()) - .withParameters(getTableParameters(glueTable)); - } - @Override public void commentTable(String databaseName, String tableName, Optional comment) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java index 3b7e0d7948b0..5ed0a2266b31 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java @@ -48,6 +48,8 @@ import static io.trino.plugin.hive.metastore.MetastoreUtil.metastoreFunctionName; import static io.trino.plugin.hive.metastore.MetastoreUtil.toResourceUris; import static io.trino.plugin.hive.metastore.MetastoreUtil.updateStatisticsParameters; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable; public final class GlueInputConverter { @@ -89,6 +91,24 @@ public static TableInput convertTable(Table table) return input; } + public static TableInput convertGlueTableToTableInput(com.amazonaws.services.glue.model.Table glueTable) + { + return new TableInput() + .withName(glueTable.getName()) + .withDescription(glueTable.getDescription()) + .withOwner(glueTable.getOwner()) + .withLastAccessTime(glueTable.getLastAccessTime()) + .withLastAnalyzedTime(glueTable.getLastAnalyzedTime()) + .withRetention(glueTable.getRetention()) + .withStorageDescriptor(glueTable.getStorageDescriptor()) + .withPartitionKeys(glueTable.getPartitionKeys()) + .withViewOriginalText(glueTable.getViewOriginalText()) + .withViewExpandedText(glueTable.getViewExpandedText()) + .withTableType(getTableTypeNullable(glueTable)) + .withTargetTable(glueTable.getTargetTable()) + .withParameters(getTableParameters(glueTable)); + } + public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics) { PartitionInput input = convertPartition(partitionWithStatistics.getPartition()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java index c15757c9269d..62deb99c088f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java @@ -19,6 +19,8 @@ import io.trino.testing.QueryRunner; import org.intellij.lang.annotations.Language; +import java.util.List; + import static com.google.common.collect.ImmutableMultiset.toImmutableMultiset; import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; @@ -36,7 +38,12 @@ public static void assertMetastoreInvocationsForQuery( { queryRunner.execute(session, query); - Multiset invocations = queryRunner.getSpans().stream() + assertMultisetsEqual(filterInvocations(queryRunner.getSpans()), expectedInvocations); + } + + public static Multiset filterInvocations(List spans) + { + return spans.stream() .map(SpanData::getName) .filter(name -> name.startsWith(TRACE_PREFIX)) .map(name -> name.substring(TRACE_PREFIX.length())) @@ -44,7 +51,5 @@ public static void assertMetastoreInvocationsForQuery( .filter(name -> !name.equals("listTablePrivileges")) .map(MetastoreMethod::fromMethodName) .collect(toImmutableMultiset()); - - assertMultisetsEqual(invocations, expectedInvocations); } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java index 628bc23a1018..d2b2286ebe55 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java @@ -36,7 +36,9 @@ public void testJmxTablesExposedByDeltaLakeConnectorBackedByGlueMetastore() assertThat(onTrino().executeQuery("SHOW TABLES IN jmx.current LIKE '%name=delta%'")).containsOnly( row("io.trino.plugin.hive.metastore.cache:name=delta,type=cachinghivemetastore"), row("io.trino.plugin.hive.metastore.glue:name=delta,type=gluehivemetastore"), + row("io.trino.plugin.hive.metastore.glue:name=delta,type=gluemetastorestats"), row("io.trino.plugin.hive:catalog=delta,name=delta,type=fileformatdatasourcestats"), + row("trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler"), row("trino.plugin.deltalake.transactionlog:catalog=delta,name=delta,type=transactionlogaccess")); } @@ -47,6 +49,7 @@ public void testJmxTablesExposedByDeltaLakeConnectorBackedByThriftMetastore() row("io.trino.plugin.hive.metastore.cache:name=delta,type=cachinghivemetastore"), row("io.trino.plugin.hive.metastore.thrift:name=delta,type=thrifthivemetastore"), row("io.trino.plugin.hive:catalog=delta,name=delta,type=fileformatdatasourcestats"), + row("trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler"), row("trino.plugin.deltalake.transactionlog:catalog=delta,name=delta,type=transactionlogaccess")); } }