diff --git a/docs/src/main/sphinx/connector/delta-lake.md b/docs/src/main/sphinx/connector/delta-lake.md
index a35c582adf2b..a6d2fa5afb4d 100644
--- a/docs/src/main/sphinx/connector/delta-lake.md
+++ b/docs/src/main/sphinx/connector/delta-lake.md
@@ -161,6 +161,13 @@ values. Typical usage does not require you to configure them.
- Maximum number of metastore data objects per transaction in the Hive
metastore cache.
- `1000`
+* - `delta.metastore.store-table-metadata`
+ - Store table comments and colum definitions in the metastore. The write
+ permission is required to update the metastore.
+ - `false`
+* - `delta.metastore.store-table-metadata-threads`
+ - Number of threads used for storing table metadata in metastore.
+ - `5`
* - `delta.delete-schema-locations-fallback`
- Whether schema locations are deleted when Trino can't determine whether they
contain external files.
diff --git a/plugin/trino-delta-lake/pom.xml b/plugin/trino-delta-lake/pom.xml
index 83a1da3d930f..10740fd938e2 100644
--- a/plugin/trino-delta-lake/pom.xml
+++ b/plugin/trino-delta-lake/pom.xml
@@ -130,6 +130,11 @@
trino-plugin-toolkit
+
+ io.trino.hive
+ hive-thrift
+
+
jakarta.annotation
jakarta.annotation-api
@@ -181,6 +186,21 @@
jmxutils
+
+ software.amazon.awssdk
+ aws-core
+
+
+
+ software.amazon.awssdk
+ glue
+
+
+
+ software.amazon.awssdk
+ utils
+
+
com.fasterxml.jackson.core
jackson-annotations
@@ -275,12 +295,6 @@
runtime
-
- software.amazon.awssdk
- glue
- runtime
-
-
com.github.docker-java
docker-java-api
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java
index aded91e665bd..d82d9749da55 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java
@@ -74,6 +74,8 @@ public class DeltaLakeConfig
private boolean collectExtendedStatisticsOnWrite = true;
private HiveCompressionCodec compressionCodec = HiveCompressionCodec.SNAPPY;
private long perTransactionMetastoreCacheMaximumSize = 1000;
+ private boolean storeTableMetadataEnabled;
+ private int storeTableMetadataThreads = 5;
private boolean deleteSchemaLocationsFallback;
private String parquetTimeZone = TimeZone.getDefault().getID();
private DataSize targetMaxFileSize = DataSize.of(1, GIGABYTE);
@@ -377,6 +379,33 @@ public DeltaLakeConfig setPerTransactionMetastoreCacheMaximumSize(long perTransa
return this;
}
+ public boolean isStoreTableMetadataEnabled()
+ {
+ return storeTableMetadataEnabled;
+ }
+
+ @Config("delta.metastore.store-table-metadata")
+ @ConfigDescription("Store table metadata in metastore")
+ public DeltaLakeConfig setStoreTableMetadataEnabled(boolean storeTableMetadataEnabled)
+ {
+ this.storeTableMetadataEnabled = storeTableMetadataEnabled;
+ return this;
+ }
+
+ @Min(0) // Allow 0 to use the same thread for testing purpose
+ public int getStoreTableMetadataThreads()
+ {
+ return storeTableMetadataThreads;
+ }
+
+ @Config("delta.metastore.store-table-metadata-threads")
+ @ConfigDescription("Number of threads used for storing table metadata in metastore")
+ public DeltaLakeConfig setStoreTableMetadataThreads(int storeTableMetadataThreads)
+ {
+ this.storeTableMetadataThreads = storeTableMetadataThreads;
+ return this;
+ }
+
public boolean isDeleteSchemaLocationsFallback()
{
return this.deleteSchemaLocationsFallback;
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java
index fe55e251ab69..5284e386be0a 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java
@@ -22,6 +22,7 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableTable;
import com.google.common.collect.Sets;
+import com.google.common.collect.Streams;
import dev.failsafe.Failsafe;
import dev.failsafe.RetryPolicy;
import io.airlift.json.JsonCodec;
@@ -47,7 +48,10 @@
import io.trino.plugin.deltalake.expression.ParsingException;
import io.trino.plugin.deltalake.expression.SparkExpressionParser;
import io.trino.plugin.deltalake.metastore.DeltaLakeMetastore;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.TableUpdateInfo;
import io.trino.plugin.deltalake.metastore.DeltaMetastoreTable;
+import io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore;
import io.trino.plugin.deltalake.metastore.NotADeltaLakeTableException;
import io.trino.plugin.deltalake.procedure.DeltaLakeTableExecuteHandle;
import io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId;
@@ -78,6 +82,7 @@
import io.trino.plugin.deltalake.transactionlog.writer.TransactionLogWriterFactory;
import io.trino.plugin.hive.TrinoViewHiveMetastore;
import io.trino.plugin.hive.security.AccessControlMetadata;
+import io.trino.spi.ErrorCode;
import io.trino.spi.NodeManager;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
@@ -105,6 +110,7 @@
import io.trino.spi.connector.Constraint;
import io.trino.spi.connector.ConstraintApplicationResult;
import io.trino.spi.connector.ProjectionApplicationResult;
+import io.trino.spi.connector.RelationCommentMetadata;
import io.trino.spi.connector.RetryMode;
import io.trino.spi.connector.RowChangeParadigm;
import io.trino.spi.connector.SaveMode;
@@ -160,6 +166,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Objects;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.OptionalLong;
@@ -168,6 +175,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
+import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@@ -189,6 +197,7 @@
import static io.trino.hive.formats.HiveClassNames.LAZY_SIMPLE_SERDE_CLASS;
import static io.trino.hive.formats.HiveClassNames.SEQUENCEFILE_INPUT_FORMAT_CLASS;
import static io.trino.metastore.StorageFormat.create;
+import static io.trino.metastore.Table.TABLE_COMMENT;
import static io.trino.plugin.base.filter.UtcConstraintExtractor.extractTupleDomain;
import static io.trino.plugin.base.projection.ApplyProjectionUtil.ProjectedColumnRepresentation;
import static io.trino.plugin.base.projection.ApplyProjectionUtil.extractSupportedProjectedColumns;
@@ -217,6 +226,7 @@
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isProjectionPushdownEnabled;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isQueryPartitionFilterRequired;
+import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isStoreTableMetadataInMetastoreEnabled;
import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled;
import static io.trino.plugin.deltalake.DeltaLakeSplitManager.partitionMatchesPredicate;
import static io.trino.plugin.deltalake.DeltaLakeTableProperties.CHANGE_DATA_FEED_ENABLED_PROPERTY;
@@ -228,8 +238,14 @@
import static io.trino.plugin.deltalake.DeltaLakeTableProperties.getCheckpointInterval;
import static io.trino.plugin.deltalake.DeltaLakeTableProperties.getLocation;
import static io.trino.plugin.deltalake.DeltaLakeTableProperties.getPartitionedBy;
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.containsSchemaString;
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.getLastTransactionVersion;
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.isSameTransactionVersion;
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters;
import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_PROPERTY;
import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.TABLE_PROVIDER_VALUE;
+import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.convertToDeltaMetastoreTable;
+import static io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore.verifyDeltaLakeTable;
import static io.trino.plugin.deltalake.procedure.DeltaLakeTableProcedureId.OPTIMIZE;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.APPEND_ONLY_CONFIGURATION_KEY;
import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.COLUMN_MAPPING_PHYSICAL_NAME_CONFIGURATION_KEY;
@@ -275,13 +291,17 @@
import static io.trino.plugin.hive.util.HiveUtil.escapeTableName;
import static io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable;
import static io.trino.plugin.hive.util.HiveUtil.isHiveSystemSchema;
+import static io.trino.spi.ErrorType.EXTERNAL;
import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
import static io.trino.spi.StandardErrorCode.GENERIC_USER_ERROR;
import static io.trino.spi.StandardErrorCode.INVALID_ANALYZE_PROPERTY;
import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS;
import static io.trino.spi.StandardErrorCode.INVALID_SCHEMA_PROPERTY;
+import static io.trino.spi.StandardErrorCode.NOT_FOUND;
import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED;
import static io.trino.spi.StandardErrorCode.QUERY_REJECTED;
+import static io.trino.spi.StandardErrorCode.TABLE_NOT_FOUND;
+import static io.trino.spi.StandardErrorCode.UNSUPPORTED_TABLE_TYPE;
import static io.trino.spi.connector.RetryMode.NO_RETRIES;
import static io.trino.spi.connector.RowChangeParadigm.DELETE_ROW_AND_INSERT_ROW;
import static io.trino.spi.connector.SchemaTableName.schemaTableName;
@@ -319,7 +339,9 @@
import static java.util.UUID.randomUUID;
import static java.util.function.Function.identity;
import static java.util.function.Predicate.not;
+import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.partitioningBy;
+import static java.util.stream.Collectors.toUnmodifiableSet;
public class DeltaLakeMetadata
implements ConnectorMetadata
@@ -407,6 +429,8 @@ public class DeltaLakeMetadata
private final boolean deleteSchemaLocationsFallback;
private final boolean useUniqueTableLocation;
private final boolean allowManagedTableRename;
+ private final DeltaLakeTableMetadataScheduler metadataScheduler;
+ private final Map tableUpdateInfos = new ConcurrentHashMap<>();
private final Map latestTableVersions = new ConcurrentHashMap<>();
private final Map queriedSnapshots = new ConcurrentHashMap<>();
@@ -437,6 +461,7 @@ public DeltaLakeMetadata(
boolean deleteSchemaLocationsFallback,
DeltaLakeRedirectionsProvider deltaLakeRedirectionsProvider,
CachingExtendedStatisticsAccess statisticsAccess,
+ DeltaLakeTableMetadataScheduler metadataScheduler,
boolean useUniqueTableLocation,
boolean allowManagedTableRename)
{
@@ -459,6 +484,7 @@ public DeltaLakeMetadata(
this.deltaLakeRedirectionsProvider = requireNonNull(deltaLakeRedirectionsProvider, "deltaLakeRedirectionsProvider is null");
this.statisticsAccess = requireNonNull(statisticsAccess, "statisticsAccess is null");
this.deleteSchemaLocationsFallback = deleteSchemaLocationsFallback;
+ this.metadataScheduler = requireNonNull(metadataScheduler, "metadataScheduler is null");
this.useUniqueTableLocation = useUniqueTableLocation;
this.allowManagedTableRename = allowManagedTableRename;
}
@@ -576,13 +602,14 @@ public LocatedTableHandle getTableHandle(
// Pretend the table does not exist to produce better error message in case of table redirects to Hive
return null;
}
- Optional table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName());
- if (table.isEmpty()) {
+ Optional metastoreTable = metastore.getRawMetastoreTable(tableName.getSchemaName(), tableName.getTableName());
+ if (metastoreTable.isEmpty()) {
return null;
}
- boolean managed = table.get().managed();
+ DeltaMetastoreTable table = convertToDeltaMetastoreTable(metastoreTable.get());
+ boolean managed = table.managed();
- String tableLocation = table.get().location();
+ String tableLocation = table.location();
TrinoFileSystem fileSystem = fileSystemFactory.create(session);
TableSnapshot tableSnapshot = getSnapshot(session, tableName, tableLocation, endVersion.map(version -> getVersion(fileSystem, tableLocation, version)));
@@ -620,6 +647,11 @@ public LocatedTableHandle getTableHandle(
return null;
}
verifySupportedColumnMapping(getColumnMappingMode(metadataEntry, protocolEntry));
+ if (metadataScheduler.canStoreTableMetadata(session, metadataEntry.getSchemaString(), Optional.ofNullable(metadataEntry.getDescription())) &&
+ endVersion.isEmpty() &&
+ !isSameTransactionVersion(metastoreTable.get(), tableSnapshot)) {
+ tableUpdateInfos.put(tableName, new TableUpdateInfo(session, tableSnapshot.getVersion(), metadataEntry.getSchemaString(), Optional.ofNullable(metadataEntry.getDescription())));
+ }
return new DeltaLakeTableHandle(
tableName.getSchemaName(),
tableName.getTableName(),
@@ -820,6 +852,93 @@ public Optional getInsertLayout(ConnectorSession session,
return Optional.of(new ConnectorTableLayout(partitionColumnNames));
}
+ @Override
+ public Iterator streamRelationComments(ConnectorSession session, Optional schemaName, UnaryOperator> relationFilter)
+ {
+ Map viewDefinitions = getViews(session, schemaName);
+ ImmutableList.Builder commentMetadataBuilder = ImmutableList.builderWithExpectedSize(viewDefinitions.size());
+ ImmutableSet.Builder viewNamesBuilder = ImmutableSet.builderWithExpectedSize(viewDefinitions.size());
+ for (Entry viewDefinitionEntry : viewDefinitions.entrySet()) {
+ RelationCommentMetadata relationCommentMetadata = RelationCommentMetadata.forRelation(viewDefinitionEntry.getKey(), viewDefinitionEntry.getValue().getComment());
+ commentMetadataBuilder.add(relationCommentMetadata);
+ viewNamesBuilder.add(relationCommentMetadata.name());
+ }
+ List views = commentMetadataBuilder.build();
+ Set viewNames = viewNamesBuilder.build();
+
+ TrinoFileSystem fileSystem = fileSystemFactory.create(session);
+
+ Stream tables = listTables(session, schemaName).stream()
+ .filter(tableName -> !viewNames.contains(tableName))
+ .collect(collectingAndThen(toUnmodifiableSet(), relationFilter)).stream()
+ .map(tableName -> getRelationCommentMetadata(session, fileSystem, tableName))
+ .filter(Objects::nonNull);
+
+ Set availableViews = relationFilter.apply(viewNames);
+ return Streams.concat(views.stream().filter(commentMetadata -> availableViews.contains(commentMetadata.name())), tables)
+ .iterator();
+ }
+
+ private RelationCommentMetadata getRelationCommentMetadata(ConnectorSession session, TrinoFileSystem fileSystem, SchemaTableName tableName)
+ {
+ if (redirectTable(session, tableName).isPresent()) {
+ return RelationCommentMetadata.forRedirectedTable(tableName);
+ }
+
+ try {
+ Optional metastoreTable = metastore.getRawMetastoreTable(tableName.getSchemaName(), tableName.getTableName());
+ if (metastoreTable.isEmpty()) {
+ // this may happen when table is being deleted concurrently
+ return null;
+ }
+
+ Table table = metastoreTable.get();
+ verifyDeltaLakeTable(table);
+
+ String tableLocation = HiveMetastoreBackedDeltaLakeMetastore.getTableLocation(table);
+ if (canUseTableParametersFromMetastore(session, fileSystem, table, tableLocation)) {
+ // Don't check TABLE_COMMENT existence because it's not stored in case of null comment
+ return RelationCommentMetadata.forRelation(tableName, Optional.ofNullable(table.getParameters().get(TABLE_COMMENT)));
+ }
+
+ TableSnapshot snapshot = getSnapshot(session, tableName, tableLocation, Optional.empty());
+ MetadataEntry metadata = transactionLogAccess.getMetadataEntry(session, snapshot);
+ return RelationCommentMetadata.forRelation(tableName, Optional.ofNullable(metadata.getDescription()));
+ }
+ catch (RuntimeException e) {
+ boolean suppressed = false;
+ if (e instanceof TrinoException trinoException) {
+ ErrorCode errorCode = trinoException.getErrorCode();
+ suppressed = errorCode.equals(UNSUPPORTED_TABLE_TYPE.toErrorCode()) ||
+ // e.g. table deleted concurrently
+ errorCode.equals(TABLE_NOT_FOUND.toErrorCode()) ||
+ errorCode.equals(NOT_FOUND.toErrorCode()) ||
+ // e.g. Delta table being deleted concurrently resulting in failure to load metadata from filesystem
+ errorCode.getType() == EXTERNAL;
+ }
+ if (suppressed) {
+ LOG.debug("Failed to get metadata for table: %s", tableName);
+ }
+ else {
+ // getTableHandle or getTableMetadata failed call may fail if table disappeared during listing or is unsupported
+ LOG.warn("Failed to get metadata for table: %s", tableName);
+ }
+ // Since the getTableHandle did not return null (i.e. succeeded or failed), we assume the table would be returned by listTables
+ return RelationCommentMetadata.forRelation(tableName, Optional.empty());
+ }
+ }
+
+ private static boolean canUseTableParametersFromMetastore(ConnectorSession session, TrinoFileSystem fileSystem, Table table, String tableLocation)
+ {
+ if (!isStoreTableMetadataInMetastoreEnabled(session)) {
+ return false;
+ }
+
+ return getLastTransactionVersion(table)
+ .map(version -> isLatestVersion(fileSystem, tableLocation, version))
+ .orElse(false);
+ }
+
@Override
public Map> listTableColumns(ConnectorSession session, SchemaTablePrefix prefix)
{
@@ -833,25 +952,37 @@ public Iterator streamTableColumns(ConnectorSession sessio
.map(_ -> singletonList(prefix.toSchemaTableName()))
.orElseGet(() -> listTables(session, prefix.getSchema()));
+ TrinoFileSystem fileSystem = fileSystemFactory.create(session);
+
return tables.stream()
- .flatMap(table -> {
+ .flatMap(tableName -> {
try {
- if (redirectTable(session, table).isPresent()) {
+ if (redirectTable(session, tableName).isPresent()) {
// put "redirect marker" for current table
- return Stream.of(TableColumnsMetadata.forRedirectedTable(table));
+ return Stream.of(TableColumnsMetadata.forRedirectedTable(tableName));
}
- Optional metastoreTable = metastore.getTable(table.getSchemaName(), table.getTableName());
+ Optional metastoreTable = metastore.getRawMetastoreTable(tableName.getSchemaName(), tableName.getTableName());
if (metastoreTable.isEmpty()) {
// this may happen when table is being deleted concurrently,
return Stream.of();
}
- String tableLocation = metastoreTable.get().location();
- TableSnapshot snapshot = transactionLogAccess.loadSnapshot(session, table, tableLocation, Optional.empty());
+
+ Table table = metastoreTable.get();
+ verifyDeltaLakeTable(table);
+
+ String tableLocation = HiveMetastoreBackedDeltaLakeMetastore.getTableLocation(table);
+ if (containsSchemaString(table) && canUseTableParametersFromMetastore(session, fileSystem, table, tableLocation)) {
+ List columnsMetadata = metadataScheduler.getColumnsMetadata(table);
+ return Stream.of(TableColumnsMetadata.forTable(tableName, columnsMetadata));
+ }
+ // Don't store cache in streamTableColumns method for avoiding too many update calls
+
+ TableSnapshot snapshot = transactionLogAccess.loadSnapshot(session, tableName, tableLocation, Optional.empty());
MetadataEntry metadata = transactionLogAccess.getMetadataEntry(session, snapshot);
ProtocolEntry protocol = transactionLogAccess.getProtocolEntry(session, snapshot);
List columnMetadata = getTableColumnMetadata(metadata, protocol);
- return Stream.of(TableColumnsMetadata.forTable(table, columnMetadata));
+ return Stream.of(TableColumnsMetadata.forTable(tableName, columnMetadata));
}
catch (NotADeltaLakeTableException | IOException e) {
return Stream.empty();
@@ -859,13 +990,28 @@ public Iterator streamTableColumns(ConnectorSession sessio
catch (RuntimeException e) {
// this may happen when table is being deleted concurrently, it still exists in metastore but TL is no longer present
// there can be several different exceptions thrown this is why all RTE are caught and ignored here
- LOG.debug(e, "Ignored exception when trying to list columns from %s", table);
+ LOG.debug(e, "Ignored exception when trying to list columns from %s", tableName);
return Stream.empty();
}
})
.iterator();
}
+ private static boolean isLatestVersion(TrinoFileSystem fileSystem, String tableLocation, long version)
+ {
+ String transactionLogDir = getTransactionLogDir(tableLocation);
+ Location transactionLogJsonEntryPath = getTransactionLogJsonEntryPath(transactionLogDir, version);
+ Location nextTransactionLogJsonEntryPath = getTransactionLogJsonEntryPath(transactionLogDir, version + 1);
+ try {
+ return !fileSystem.newInputFile(nextTransactionLogJsonEntryPath).exists() &&
+ fileSystem.newInputFile(transactionLogJsonEntryPath).exists();
+ }
+ catch (IOException e) {
+ LOG.debug(e, "Failed to check table location: %s", tableLocation);
+ return false;
+ }
+ }
+
private List getColumns(MetadataEntry deltaMetadata, ProtocolEntry protocolEntry)
{
ImmutableList.Builder columns = ImmutableList.builder();
@@ -999,6 +1145,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe
checkPathContainsNoFiles(session, Location.of(location));
external = false;
}
+ long commitVersion = 0;
Location deltaLogDirectory = Location.of(getTransactionLogDir(location));
Optional checkpointInterval = getCheckpointInterval(tableMetadata.getProperties());
Optional changeDataFeedEnabled = getChangeDataFeedEnabled(tableMetadata.getProperties());
@@ -1025,6 +1172,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe
maxFieldId = OptionalInt.of(fieldId.get());
}
+ String schemaString = serializeSchemaAsJson(deltaTable.build());
try {
TrinoFileSystem fileSystem = fileSystemFactory.create(session);
boolean transactionLogFileExists = fileSystem.listFiles(deltaLogDirectory).hasNext();
@@ -1035,7 +1183,6 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe
"Using CREATE [OR REPLACE] TABLE with an existing table content is disallowed, instead use the system.register_table() procedure.");
}
else {
- long commitVersion = 0;
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriterWithoutTransactionIsolation(session, location);
ProtocolEntry protocolEntry;
if (replaceExistingTable) {
@@ -1086,7 +1233,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Unable to access file system for: " + location, e);
}
- Table table = buildTable(session, schemaTableName, location, external);
+ Table table = buildTable(session, schemaTableName, location, external, tableMetadata.getComment(), commitVersion, schemaString);
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
// As a precaution, clear the caches
@@ -1100,7 +1247,7 @@ public void createTable(ConnectorSession session, ConnectorTableMetadata tableMe
}
}
- public static Table buildTable(ConnectorSession session, SchemaTableName schemaTableName, String location, boolean isExternal)
+ public Table buildTable(ConnectorSession session, SchemaTableName schemaTableName, String location, boolean isExternal, Optional tableComment, long version, String schemaString)
{
Table.Builder tableBuilder = Table.builder()
.setDatabaseName(schemaTableName.getSchemaName())
@@ -1108,13 +1255,13 @@ public static Table buildTable(ConnectorSession session, SchemaTableName schemaT
.setOwner(Optional.of(session.getUser()))
.setTableType(isExternal ? EXTERNAL_TABLE.name() : MANAGED_TABLE.name())
.setDataColumns(DUMMY_DATA_COLUMNS)
- .setParameters(deltaTableProperties(session, location, isExternal));
+ .setParameters(deltaTableProperties(session, location, isExternal, tableComment, version, schemaString));
setDeltaStorageFormat(tableBuilder, location);
return tableBuilder.build();
}
- private static Map deltaTableProperties(ConnectorSession session, String location, boolean external)
+ private Map deltaTableProperties(ConnectorSession session, String location, boolean external, Optional tableComment, long version, String schemaString)
{
ImmutableMap.Builder properties = ImmutableMap.builder()
.put(TRINO_QUERY_ID_NAME, session.getQueryId())
@@ -1130,6 +1277,9 @@ private static Map deltaTableProperties(ConnectorSession session
// Mimicking the behavior of the Hive connector which sets both `Table#setTableType` and the "EXTERNAL" table property
properties.put("EXTERNAL", "TRUE");
}
+ if (metadataScheduler.canStoreTableMetadata(session, schemaString, tableComment)) {
+ properties.putAll(tableMetadataParameters(version, schemaString, tableComment));
+ }
return properties.buildOrThrow();
}
@@ -1393,7 +1543,6 @@ public Optional finishCreateTable(
.collect(toImmutableList());
SchemaTableName schemaTableName = schemaTableName(schemaName, tableName);
- Table table = buildTable(session, schemaTableName, location, handle.external());
ColumnMappingMode columnMappingMode = handle.columnMappingMode();
String schemaString = handle.schemaString();
@@ -1478,6 +1627,7 @@ public Optional finishCreateTable(
true);
}
+ Table table = buildTable(session, schemaTableName, location, handle.external(), handle.comment(), commitVersion, handle.schemaString());
PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
// As a precaution, clear the caches
@@ -1521,6 +1671,7 @@ public void setTableComment(ConnectorSession session, ConnectorTableHandle table
if (columnMappingMode != ID && columnMappingMode != NAME && columnMappingMode != NONE) {
throw new TrinoException(NOT_SUPPORTED, "Setting a table comment with column mapping %s is not supported".formatted(columnMappingMode));
}
+ MetadataEntry metadataEntry = handle.getMetadataEntry();
ProtocolEntry protocolEntry = handle.getProtocolEntry();
checkUnsupportedWriterFeatures(protocolEntry);
@@ -1537,6 +1688,7 @@ public void setTableComment(ConnectorSession session, ConnectorTableHandle table
MetadataEntry.builder(handle.getMetadataEntry())
.setDescription(comment));
transactionLogWriter.flush();
+ enqueueUpdateInfo(session, handle.getSchemaName(), handle.getTableName(), commitVersion, metadataEntry.getSchemaString(), comment);
}
catch (Exception e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to comment on table: %s.%s", handle.getSchemaName(), handle.getTableName()), e);
@@ -1563,6 +1715,7 @@ public void setColumnComment(ConnectorSession session, ConnectorTableHandle tabl
DeltaLakeTable deltaTable = DeltaLakeTable.builder(deltaLakeTableHandle.getMetadataEntry(), deltaLakeTableHandle.getProtocolEntry())
.setColumnComment(deltaLakeColumnHandle.getBaseColumnName(), comment.orElse(null))
.build();
+ String schemaString = serializeSchemaAsJson(deltaTable);
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, deltaLakeTableHandle.getLocation());
appendTableEntries(
@@ -1572,8 +1725,15 @@ public void setColumnComment(ConnectorSession session, ConnectorTableHandle tabl
session,
protocolEntry,
MetadataEntry.builder(deltaLakeTableHandle.getMetadataEntry())
- .setSchemaString(serializeSchemaAsJson(deltaTable)));
+ .setSchemaString(schemaString));
transactionLogWriter.flush();
+ enqueueUpdateInfo(
+ session,
+ deltaLakeTableHandle.getSchemaName(),
+ deltaLakeTableHandle.getTableName(),
+ commitVersion,
+ schemaString,
+ Optional.ofNullable(deltaLakeTableHandle.getMetadataEntry().getDescription()));
}
catch (Exception e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to add '%s' column comment for: %s.%s", deltaLakeColumnHandle.getBaseColumnName(), deltaLakeTableHandle.getSchemaName(), deltaLakeTableHandle.getTableName()), e);
@@ -1637,7 +1797,7 @@ public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle
newColumnMetadata.getComment(),
generateColumnMetadata(columnMappingMode, maxColumnId))
.build();
-
+ String schemaString = serializeSchemaAsJson(deltaTable);
Map configuration = new HashMap<>(handle.getMetadataEntry().getConfiguration());
if (columnMappingMode == ID || columnMappingMode == NAME) {
checkArgument(maxColumnId.get() > 0, "maxColumnId must be larger than 0: %s", maxColumnId);
@@ -1652,9 +1812,16 @@ public void addColumn(ConnectorSession session, ConnectorTableHandle tableHandle
session,
buildProtocolEntryForNewColumn(protocolEntry, newColumnMetadata.getType()),
MetadataEntry.builder(handle.getMetadataEntry())
- .setSchemaString(serializeSchemaAsJson(deltaTable))
+ .setSchemaString(schemaString)
.setConfiguration(configuration));
transactionLogWriter.flush();
+ enqueueUpdateInfo(
+ session,
+ handle.getSchemaName(),
+ handle.getTableName(),
+ commitVersion,
+ schemaString,
+ Optional.ofNullable(handle.getMetadataEntry().getDescription()));
}
catch (Exception e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to add '%s' column for: %s.%s %s", newColumnMetadata.getName(), handle.getSchemaName(), handle.getTableName(), firstNonNull(e.getMessage(), e)), e);
@@ -1727,6 +1894,7 @@ public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandl
throw new TrinoException(NOT_SUPPORTED, "Dropping the last non-partition column is unsupported");
}
+ String schemaString = serializeSchemaAsJson(deltaTable);
try {
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, table.getLocation());
appendTableEntries(
@@ -1736,8 +1904,9 @@ public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandl
session,
protocolEntry,
MetadataEntry.builder(metadataEntry)
- .setSchemaString(serializeSchemaAsJson(deltaTable)));
+ .setSchemaString(schemaString));
transactionLogWriter.flush();
+ enqueueUpdateInfo(session, table.getSchemaName(), table.getTableName(), commitVersion, schemaString, Optional.ofNullable(metadataEntry.getDescription()));
}
catch (Exception e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to drop '%s' column from: %s.%s", dropColumnName, table.getSchemaName(), table.getTableName()), e);
@@ -1792,6 +1961,7 @@ public void renameColumn(ConnectorSession session, ConnectorTableHandle tableHan
DeltaLakeTable deltaTable = DeltaLakeTable.builder(metadataEntry, protocolEntry)
.renameColumn(sourceColumnName, newColumnName)
.build();
+ String schemaString = serializeSchemaAsJson(deltaTable);
try {
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, table.getLocation());
appendTableEntries(
@@ -1801,9 +1971,10 @@ public void renameColumn(ConnectorSession session, ConnectorTableHandle tableHan
session,
protocolEntry,
MetadataEntry.builder(metadataEntry)
- .setSchemaString(serializeSchemaAsJson(deltaTable))
+ .setSchemaString(schemaString)
.setPartitionColumns(partitionColumns));
transactionLogWriter.flush();
+ enqueueUpdateInfo(session, table.getSchemaName(), table.getTableName(), commitVersion, schemaString, Optional.ofNullable(metadataEntry.getDescription()));
// Don't update extended statistics because it uses physical column names internally
}
catch (Exception e) {
@@ -1827,17 +1998,20 @@ public void dropNotNullConstraint(ConnectorSession session, ConnectorTableHandle
DeltaLakeTable deltaTable = DeltaLakeTable.builder(metadataEntry, protocolEntry)
.dropNotNullConstraint(columnName)
.build();
+ long commitVersion = table.getReadVersion() + 1;
+ String schemaString = serializeSchemaAsJson(deltaTable);
try {
TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, table.getLocation());
appendTableEntries(
- table.getReadVersion() + 1,
+ commitVersion,
transactionLogWriter,
CHANGE_COLUMN_OPERATION,
session,
protocolEntry,
MetadataEntry.builder(metadataEntry)
- .setSchemaString(serializeSchemaAsJson(deltaTable)));
+ .setSchemaString(schemaString));
transactionLogWriter.flush();
+ enqueueUpdateInfo(session, table.getSchemaName(), table.getTableName(), commitVersion, schemaString, Optional.ofNullable(metadataEntry.getDescription()));
}
catch (Exception e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, format("Unable to drop not null constraint from '%s' column in: %s", columnName, table.getSchemaTableName()), e);
@@ -1975,6 +2149,7 @@ public Optional finishInsert(
.get(context -> commitInsertOperation(session, handle, sourceTableHandles, isolationLevel, dataFileInfos, readVersion, context.getAttemptCount()));
writeCommitted = true;
writeCheckpointIfNeeded(session, handle.tableName(), handle.location(), handle.readVersion(), handle.metadataEntry().getCheckpointInterval(), commitVersion);
+ enqueueUpdateInfo(session, handle.tableName().getSchemaName(), handle.tableName().getTableName(), commitVersion, handle.metadataEntry().getSchemaString(), Optional.ofNullable(handle.metadataEntry().getDescription()));
if (isCollectExtendedStatisticsColumnStatisticsOnWrite(session) && !computedStatistics.isEmpty() && !dataFileInfos.isEmpty()) {
// TODO (https://github.com/trinodb/trino/issues/16088) Add synchronization when version conflict for INSERT is resolved.
@@ -2281,6 +2456,13 @@ public void finishMerge(
long commitVersion = Failsafe.with(TRANSACTION_CONFLICT_RETRY_POLICY)
.get(context -> commitMergeOperation(session, mergeHandle, mergeResults, sourceTableHandles, isolationLevel, allFiles, readVersion, context.getAttemptCount()));
writeCommitted = true;
+ enqueueUpdateInfo(
+ session,
+ handle.getSchemaName(),
+ handle.getTableName(),
+ commitVersion,
+ handle.getMetadataEntry().getSchemaString(),
+ Optional.ofNullable(handle.getMetadataEntry().getDescription()));
writeCheckpointIfNeeded(session, handle.getSchemaTableName(), handle.getLocation(), handle.getReadVersion(), checkpointInterval, commitVersion);
}
@@ -2557,6 +2739,13 @@ private void finishOptimize(ConnectorSession session, DeltaLakeTableExecuteHandl
transactionLogWriter.flush();
writeCommitted = true;
+ enqueueUpdateInfo(
+ session,
+ executeHandle.schemaTableName().getSchemaName(),
+ executeHandle.schemaTableName().getTableName(),
+ commitVersion,
+ optimizeHandle.getMetadataEntry().getSchemaString(),
+ Optional.ofNullable(optimizeHandle.getMetadataEntry().getDescription()));
Optional checkpointInterval = Optional.of(1L); // force checkpoint
writeCheckpointIfNeeded(
session,
@@ -2846,6 +3035,13 @@ public void setTableProperties(ConnectorSession session, ConnectorTableHandle ta
metadataEntry.ifPresent(transactionLogWriter::appendMetadataEntry);
transactionLogWriter.flush();
+ enqueueUpdateInfo(
+ session,
+ handle.getSchemaName(),
+ handle.getTableName(),
+ commitVersion,
+ metadataEntry.orElseThrow().getSchemaString(),
+ Optional.ofNullable(metadataEntry.orElseThrow().getDescription()));
}
catch (IOException e) {
throw new TrinoException(DELTA_LAKE_BAD_WRITE, "Failed to write Delta Lake transaction log entry", e);
@@ -3868,6 +4064,13 @@ private OptionalLong executeDelete(ConnectorSession session, ConnectorTableHandl
tableHandle.getReadVersion(),
tableHandle.getMetadataEntry().getCheckpointInterval(),
commitDeleteOperationResult.commitVersion());
+ enqueueUpdateInfo(
+ session,
+ tableHandle.getSchemaName(),
+ tableHandle.getTableName(),
+ commitDeleteOperationResult.commitVersion,
+ tableHandle.getMetadataEntry().getSchemaString(),
+ Optional.ofNullable(tableHandle.getMetadataEntry().getDescription()));
return commitDeleteOperationResult.deletedRecords();
}
catch (Exception e) {
@@ -3921,6 +4124,20 @@ private record CommitDeleteOperationResult(long commitVersion, OptionalLong dele
}
}
+ private void enqueueUpdateInfo(ConnectorSession session, String schemaName, String tableName, long version, String schemaString, Optional tableComment)
+ {
+ if (!metadataScheduler.canStoreTableMetadata(session, schemaString, tableComment)) {
+ return;
+ }
+ tableUpdateInfos.put(new SchemaTableName(schemaName, tableName), new TableUpdateInfo(session, version, schemaString, tableComment));
+ }
+
+ public void commit()
+ {
+ metadataScheduler.putAll(tableUpdateInfos);
+ tableUpdateInfos.clear();
+ }
+
private Stream getAddFileEntriesMatchingEnforcedPartitionConstraint(ConnectorSession session, DeltaLakeTableHandle tableHandle)
{
TableSnapshot tableSnapshot = getSnapshot(session, tableHandle);
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java
index 0bb1efc579ca..485ef534d91f 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadataFactory.java
@@ -16,6 +16,7 @@
import com.google.inject.Inject;
import io.airlift.json.JsonCodec;
import io.trino.filesystem.TrinoFileSystemFactory;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler;
import io.trino.plugin.deltalake.metastore.HiveMetastoreBackedDeltaLakeMetastore;
import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess;
import io.trino.plugin.deltalake.statistics.FileBasedTableStatisticsProvider;
@@ -56,6 +57,7 @@ public class DeltaLakeMetadataFactory
private final long perTransactionMetastoreCacheMaximumSize;
private final boolean deleteSchemaLocationsFallback;
private final boolean useUniqueTableLocation;
+ private final DeltaLakeTableMetadataScheduler metadataScheduler;
private final boolean allowManagedTableRename;
private final String trinoVersion;
@@ -76,7 +78,8 @@ public DeltaLakeMetadataFactory(
DeltaLakeRedirectionsProvider deltaLakeRedirectionsProvider,
CachingExtendedStatisticsAccess statisticsAccess,
@AllowDeltaLakeManagedTableRename boolean allowManagedTableRename,
- NodeVersion nodeVersion)
+ NodeVersion nodeVersion,
+ DeltaLakeTableMetadataScheduler metadataScheduler)
{
this.hiveMetastoreFactory = requireNonNull(hiveMetastoreFactory, "hiveMetastore is null");
this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
@@ -98,6 +101,7 @@ public DeltaLakeMetadataFactory(
this.useUniqueTableLocation = deltaLakeConfig.isUniqueTableLocation();
this.allowManagedTableRename = allowManagedTableRename;
this.trinoVersion = requireNonNull(nodeVersion, "nodeVersion is null").toString();
+ this.metadataScheduler = requireNonNull(metadataScheduler, "metadataScheduler is null");
}
public DeltaLakeMetadata create(ConnectorIdentity identity)
@@ -135,6 +139,7 @@ public DeltaLakeMetadata create(ConnectorIdentity identity)
deleteSchemaLocationsFallback,
deltaLakeRedirectionsProvider,
statisticsAccess,
+ metadataScheduler,
useUniqueTableLocation,
allowManagedTableRename);
}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java
index 7960612bf0ee..3026b015b3e4 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeModule.java
@@ -27,6 +27,7 @@
import io.trino.plugin.deltalake.cache.DeltaLakeCacheKeyProvider;
import io.trino.plugin.deltalake.functions.tablechanges.TableChangesFunctionProvider;
import io.trino.plugin.deltalake.functions.tablechanges.TableChangesProcessorProvider;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler;
import io.trino.plugin.deltalake.procedure.DropExtendedStatsProcedure;
import io.trino.plugin.deltalake.procedure.FlushMetadataCacheProcedure;
import io.trino.plugin.deltalake.procedure.OptimizeTableProcedure;
@@ -117,6 +118,9 @@ public void setup(Binder binder)
binder.bind(TransactionLogAccess.class).in(Scopes.SINGLETON);
newExporter(binder).export(TransactionLogAccess.class)
.as(generator -> generator.generatedNameOf(TransactionLogAccess.class, catalogName.get().toString()));
+ binder.bind(DeltaLakeTableMetadataScheduler.class).in(Scopes.SINGLETON);
+ newExporter(binder).export(DeltaLakeTableMetadataScheduler.class)
+ .as(generator -> generator.generatedNameOf(DeltaLakeTableMetadataScheduler.class, catalogName.get().toString()));
binder.bind(TransactionLogWriterFactory.class).in(Scopes.SINGLETON);
binder.bind(TransactionLogSynchronizerManager.class).in(Scopes.SINGLETON);
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java
index d3f8e8eb398f..01e744dc3047 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java
@@ -75,6 +75,7 @@ public final class DeltaLakeSessionProperties
private static final String PROJECTION_PUSHDOWN_ENABLED = "projection_pushdown_enabled";
private static final String QUERY_PARTITION_FILTER_REQUIRED = "query_partition_filter_required";
private static final String CHECKPOINT_FILTERING_ENABLED = "checkpoint_filtering_enabled";
+ private static final String STORE_TABLE_METADATA = "store_table_metadata";
private final List> sessionProperties;
@@ -230,7 +231,12 @@ public DeltaLakeSessionProperties(
CHECKPOINT_FILTERING_ENABLED,
"Use filter in checkpoint reader",
deltaLakeConfig.isCheckpointFilteringEnabled(),
- false));
+ false),
+ booleanProperty(
+ STORE_TABLE_METADATA,
+ "Store table metadata in metastore",
+ deltaLakeConfig.isStoreTableMetadataEnabled(),
+ true));
}
@Override
@@ -348,4 +354,9 @@ public static boolean isCheckpointFilteringEnabled(ConnectorSession session)
{
return session.getProperty(CHECKPOINT_FILTERING_ENABLED, Boolean.class);
}
+
+ public static boolean isStoreTableMetadataInMetastoreEnabled(ConnectorSession session)
+ {
+ return session.getProperty(STORE_TABLE_METADATA, Boolean.class);
+ }
}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java
index 175f64c699ca..9aa3e56cb1ea 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeTransactionManager.java
@@ -53,6 +53,11 @@ public void commit(ConnectorTransactionHandle transaction)
{
MemoizedMetadata deltaLakeMetadata = transactions.remove(transaction);
checkArgument(deltaLakeMetadata != null, "no such transaction: %s", transaction);
+ deltaLakeMetadata.optionalGet().ifPresent(metadata -> {
+ try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(getClass().getClassLoader())) {
+ metadata.commit();
+ }
+ });
}
public void rollback(ConnectorTransactionHandle transaction)
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/MaxTableParameterLength.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/MaxTableParameterLength.java
new file mode 100644
index 000000000000..aeb0e9f7f6a0
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/MaxTableParameterLength.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake;
+
+import com.google.inject.BindingAnnotation;
+
+import java.lang.annotation.Retention;
+import java.lang.annotation.Target;
+
+import static java.lang.annotation.ElementType.FIELD;
+import static java.lang.annotation.ElementType.METHOD;
+import static java.lang.annotation.ElementType.PARAMETER;
+import static java.lang.annotation.RetentionPolicy.RUNTIME;
+
+@Retention(RUNTIME)
+@Target({FIELD, PARAMETER, METHOD})
+@BindingAnnotation
+public @interface MaxTableParameterLength {}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableMetadataScheduler.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableMetadataScheduler.java
new file mode 100644
index 000000000000..7136098f0d4a
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableMetadataScheduler.java
@@ -0,0 +1,263 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.inject.Inject;
+import io.airlift.concurrent.MoreFutures;
+import io.airlift.log.Logger;
+import io.trino.metastore.Table;
+import io.trino.plugin.deltalake.DeltaLakeColumnMetadata;
+import io.trino.plugin.deltalake.DeltaLakeConfig;
+import io.trino.plugin.deltalake.MaxTableParameterLength;
+import io.trino.plugin.deltalake.transactionlog.TableSnapshot;
+import io.trino.spi.NodeManager;
+import io.trino.spi.connector.ColumnMetadata;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.connector.SchemaTableName;
+import io.trino.spi.connector.TableNotFoundException;
+import io.trino.spi.type.TypeManager;
+import jakarta.annotation.PostConstruct;
+import jakarta.annotation.PreDestroy;
+import org.weakref.jmx.Managed;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static com.google.common.collect.ImmutableMap.toImmutableMap;
+import static com.google.common.util.concurrent.MoreExecutors.newDirectExecutorService;
+import static io.airlift.concurrent.Threads.daemonThreadsNamed;
+import static io.airlift.concurrent.Threads.threadsNamed;
+import static io.trino.metastore.Table.TABLE_COMMENT;
+import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isStoreTableMetadataInMetastoreEnabled;
+import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.ColumnMappingMode.NONE;
+import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.getColumnMetadata;
+import static java.util.Comparator.comparing;
+import static java.util.Objects.requireNonNull;
+import static java.util.concurrent.Executors.newFixedThreadPool;
+import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.function.BinaryOperator.maxBy;
+
+public class DeltaLakeTableMetadataScheduler
+{
+ private static final Logger log = Logger.get(DeltaLakeTableMetadataScheduler.class);
+
+ private static final String TRINO_LAST_TRANSACTION_VERSION = "trino_last_transaction_version";
+ private static final String TRINO_METADATA_SCHEMA_STRING = "trino_metadata_schema_string";
+ private static final int MAX_FAILED_COUNTS = 10;
+
+ private final DeltaLakeTableOperationsProvider tableOperationsProvider;
+ private final TypeManager typeManager;
+ private final int tableParameterLengthLimit;
+ private final int storeTableMetadataThreads;
+ private final Map updateInfos = new ConcurrentHashMap<>();
+ private final boolean enabled;
+
+ private ExecutorService executor;
+ private ScheduledExecutorService scheduler;
+ private final AtomicInteger failedCounts = new AtomicInteger();
+
+ @Inject
+ public DeltaLakeTableMetadataScheduler(
+ NodeManager nodeManager,
+ TypeManager typeManager,
+ DeltaLakeTableOperationsProvider tableOperationsProvider,
+ @MaxTableParameterLength int tableParameterLengthLimit,
+ DeltaLakeConfig config)
+ {
+ this.typeManager = requireNonNull(typeManager, "typeManager is null");
+ this.tableOperationsProvider = requireNonNull(tableOperationsProvider, "tableOperationsProvider is null");
+ this.tableParameterLengthLimit = tableParameterLengthLimit;
+ this.storeTableMetadataThreads = config.getStoreTableMetadataThreads();
+ requireNonNull(nodeManager, "nodeManager is null");
+ this.enabled = config.isStoreTableMetadataEnabled() && nodeManager.getCurrentNode().isCoordinator();
+ }
+
+ @Managed
+ public boolean isShutdown()
+ {
+ return scheduler.isShutdown();
+ }
+
+ public void putAll(Map tableParameters)
+ {
+ if (!enabled || scheduler.isShutdown()) {
+ log.debug("Scheduler is already shutdown, skipping the update: %s", tableParameters);
+ return;
+ }
+ updateInfos.putAll(tableParameters);
+ }
+
+ @PostConstruct
+ public void start()
+ {
+ if (enabled) {
+ executor = storeTableMetadataThreads == 0 ? newDirectExecutorService() : newFixedThreadPool(storeTableMetadataThreads, threadsNamed("store-table-metadata-%s"));
+ scheduler = newSingleThreadScheduledExecutor(daemonThreadsNamed("store-table-metadata"));
+
+ scheduler.scheduleWithFixedDelay(() -> {
+ try {
+ process();
+ }
+ catch (Throwable e) {
+ log.warn(e, "Error storing table metadata");
+ }
+ try {
+ checkFailedTasks();
+ }
+ catch (Throwable e) {
+ log.warn(e, "Error canceling metadata update tasks");
+ }
+ }, 200, 1000, MILLISECONDS);
+ }
+ }
+
+ @VisibleForTesting
+ public void process()
+ {
+ List> tasks = new ArrayList<>();
+ synchronized (this) {
+ if (updateInfos.isEmpty()) {
+ return;
+ }
+
+ Map updateTables = updateInfos.entrySet().stream()
+ .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue, maxBy(comparing(TableUpdateInfo::version))));
+
+ log.debug("Processing %s table(s): %s", updateTables.size(), updateTables.keySet());
+ for (Map.Entry entry : updateTables.entrySet()) {
+ tasks.add(() -> {
+ updateTable(entry.getKey(), entry.getValue());
+ return null;
+ });
+ }
+
+ updateInfos.clear();
+ }
+
+ try {
+ executor.invokeAll(tasks).forEach(MoreFutures::getDone);
+ }
+ catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void updateTable(SchemaTableName schemaTableName, TableUpdateInfo info)
+ {
+ log.debug("Updating table: '%s'", schemaTableName);
+ try {
+ tableOperationsProvider.createTableOperations(info.session)
+ .commitToExistingTable(schemaTableName, info.version, info.schemaString, info.tableComment);
+ log.debug("Replaced table: '%s'", schemaTableName);
+ }
+ catch (TableNotFoundException e) {
+ // Don't increment failedCounts. The table might have been dropped concurrently.
+ log.debug("Table disappeared during metadata updating operation: '%s'", schemaTableName);
+ }
+ catch (Exception e) {
+ log.warn(e, "Failed to store table metadata for '%s'", schemaTableName);
+ // TODO Consider increment only when the exception is permission issue
+ failedCounts.incrementAndGet();
+ }
+ }
+
+ private void checkFailedTasks()
+ {
+ if (failedCounts.get() > MAX_FAILED_COUNTS) {
+ log.warn("Too many failed tasks, stopping the scheduler");
+ stop();
+ }
+ }
+
+ @VisibleForTesting
+ public void clear()
+ {
+ updateInfos.clear();
+ }
+
+ @PreDestroy
+ public void stop()
+ {
+ if (enabled) {
+ scheduler.shutdownNow();
+ executor.shutdownNow();
+ }
+ }
+
+ public static boolean isSameTransactionVersion(Table table, TableSnapshot snapshot)
+ {
+ return getLastTransactionVersion(table)
+ .map(version -> version == snapshot.getVersion())
+ .orElse(false);
+ }
+
+ public static Optional getLastTransactionVersion(Table table)
+ {
+ String version = table.getParameters().get(TRINO_LAST_TRANSACTION_VERSION);
+ return Optional.ofNullable(version).map(Long::parseLong);
+ }
+
+ public static boolean containsSchemaString(Table table)
+ {
+ return table.getParameters().containsKey(TRINO_METADATA_SCHEMA_STRING);
+ }
+
+ public List getColumnsMetadata(Table table)
+ {
+ String schemaString = table.getParameters().get(TRINO_METADATA_SCHEMA_STRING);
+ // Specify NONE and empty partition because they are unused when listing columns
+ return getColumnMetadata(schemaString, typeManager, NONE, ImmutableList.of()).stream()
+ .map(DeltaLakeColumnMetadata::columnMetadata)
+ .collect(toImmutableList());
+ }
+
+ public boolean canStoreTableMetadata(ConnectorSession session, String schemaString, Optional tableComment)
+ {
+ return isStoreTableMetadataInMetastoreEnabled(session) &&
+ schemaString.length() <= tableParameterLengthLimit &&
+ tableComment.map(String::length).orElse(0) <= tableParameterLengthLimit;
+ }
+
+ public static Map tableMetadataParameters(long version, String schemaString, Optional tableComment)
+ {
+ ImmutableMap.Builder parameters = ImmutableMap.builder();
+ tableComment.ifPresent(comment -> parameters.put(TABLE_COMMENT, comment));
+ parameters.put(TRINO_LAST_TRANSACTION_VERSION, Long.toString(version));
+ parameters.put(TRINO_METADATA_SCHEMA_STRING, schemaString);
+ return parameters.buildOrThrow();
+ }
+
+ public record TableUpdateInfo(ConnectorSession session, long version, String schemaString, Optional tableComment)
+ {
+ public TableUpdateInfo
+ {
+ requireNonNull(session, "session is null");
+ requireNonNull(schemaString, "schemaString is null");
+ requireNonNull(tableComment, "tableComment is null");
+ }
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperations.java
new file mode 100644
index 000000000000..b146833afb65
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperations.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore;
+
+import io.trino.annotation.NotThreadSafe;
+import io.trino.spi.connector.SchemaTableName;
+
+import java.util.Optional;
+
+@NotThreadSafe
+public interface DeltaLakeTableOperations
+{
+ /**
+ * @throws io.trino.spi.connector.TableNotFoundException if the table does not exist
+ */
+ void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment);
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperationsProvider.java
new file mode 100644
index 000000000000..53a8cc4a3cf8
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/DeltaLakeTableOperationsProvider.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore;
+
+import io.trino.spi.connector.ConnectorSession;
+
+public interface DeltaLakeTableOperationsProvider
+{
+ DeltaLakeTableOperations createTableOperations(ConnectorSession session);
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java
index 1c1533894392..3ff1ed7622e1 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/HiveMetastoreBackedDeltaLakeMetastore.java
@@ -74,13 +74,8 @@ public Optional getRawMetastoreTable(String databaseName, String tableNam
@Override
public Optional getTable(String databaseName, String tableName)
{
- return getRawMetastoreTable(databaseName, tableName).map(table -> {
- verifyDeltaLakeTable(table);
- return new DeltaMetastoreTable(
- new SchemaTableName(databaseName, tableName),
- table.getTableType().equals(MANAGED_TABLE.name()),
- getTableLocation(table));
- });
+ return getRawMetastoreTable(databaseName, tableName)
+ .map(HiveMetastoreBackedDeltaLakeMetastore::convertToDeltaMetastoreTable);
}
public static void verifyDeltaLakeTable(Table table)
@@ -130,6 +125,15 @@ public void renameTable(SchemaTableName from, SchemaTableName to)
delegate.renameTable(from.getSchemaName(), from.getTableName(), to.getSchemaName(), to.getTableName());
}
+ public static DeltaMetastoreTable convertToDeltaMetastoreTable(Table table)
+ {
+ verifyDeltaLakeTable(table);
+ return new DeltaMetastoreTable(
+ new SchemaTableName(table.getDatabaseName(), table.getTableName()),
+ table.getTableType().equals(MANAGED_TABLE.name()),
+ getTableLocation(table));
+ }
+
public static String getTableLocation(Table table)
{
Map serdeParameters = table.getStorage().getSerdeParameters();
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java
index 8c540afa471c..bda2f9ef71ad 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreModule.java
@@ -15,8 +15,11 @@
import com.google.inject.Binder;
import com.google.inject.Key;
+import com.google.inject.Scopes;
import io.airlift.configuration.AbstractConfigurationAwareModule;
import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename;
+import io.trino.plugin.deltalake.MaxTableParameterLength;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
import io.trino.plugin.hive.metastore.file.FileMetastoreModule;
public class DeltaLakeFileMetastoreModule
@@ -26,6 +29,8 @@ public class DeltaLakeFileMetastoreModule
protected void setup(Binder binder)
{
install(new FileMetastoreModule());
+ binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeFileMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON);
binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true);
+ binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(Integer.MAX_VALUE);
}
}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperations.java
new file mode 100644
index 000000000000..69019cf22133
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperations.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.file;
+
+import com.google.common.collect.ImmutableMap;
+import io.trino.metastore.HiveMetastore;
+import io.trino.metastore.Table;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.spi.connector.SchemaTableName;
+import io.trino.spi.connector.TableNotFoundException;
+
+import java.util.Map;
+import java.util.Optional;
+
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters;
+import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet;
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeFileMetastoreTableOperations
+ implements DeltaLakeTableOperations
+{
+ private final HiveMetastore metastore;
+
+ public DeltaLakeFileMetastoreTableOperations(HiveMetastore metastore)
+ {
+ this.metastore = requireNonNull(metastore, "metastore is null");
+ }
+
+ @Override
+ public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment)
+ {
+ Table currentTable = metastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName())
+ .orElseThrow(() -> new TableNotFoundException(schemaTableName));
+ Map parameters = ImmutableMap.builder()
+ .putAll(currentTable.getParameters())
+ .putAll(tableMetadataParameters(version, schemaString, tableComment))
+ .buildKeepingLast();
+ Table updatedTable = currentTable.withParameters(parameters);
+ metastore.replaceTable(currentTable.getDatabaseName(), currentTable.getTableName(), updatedTable, buildInitialPrivilegeSet(currentTable.getOwner().orElseThrow()));
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperationsProvider.java
new file mode 100644
index 000000000000..898679cb6497
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/file/DeltaLakeFileMetastoreTableOperationsProvider.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.file;
+
+import com.google.inject.Inject;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
+import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
+import io.trino.spi.connector.ConnectorSession;
+
+import java.util.Optional;
+
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeFileMetastoreTableOperationsProvider
+ implements DeltaLakeTableOperationsProvider
+{
+ private final HiveMetastoreFactory hiveMetastoreFactory;
+
+ @Inject
+ public DeltaLakeFileMetastoreTableOperationsProvider(HiveMetastoreFactory hiveMetastoreFactory)
+ {
+ this.hiveMetastoreFactory = requireNonNull(hiveMetastoreFactory, "hiveMetastoreFactory is null");
+ }
+
+ @Override
+ public DeltaLakeTableOperations createTableOperations(ConnectorSession session)
+ {
+ return new DeltaLakeFileMetastoreTableOperations(hiveMetastoreFactory.createMetastore(Optional.of(session.getIdentity())));
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java
index 179e76017ce0..a1b2ced44dca 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreModule.java
@@ -15,18 +15,23 @@
import com.google.inject.Binder;
import com.google.inject.Key;
+import com.google.inject.Scopes;
import com.google.inject.Singleton;
import com.google.inject.multibindings.ProvidesIntoOptional;
import io.airlift.configuration.AbstractConfigurationAwareModule;
import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename;
+import io.trino.plugin.deltalake.MaxTableParameterLength;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
import io.trino.plugin.hive.metastore.glue.GlueHiveMetastore;
import io.trino.plugin.hive.metastore.glue.GlueMetastoreModule;
+import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats;
import java.util.EnumSet;
import java.util.Set;
import static com.google.inject.multibindings.ProvidesIntoOptional.Type.ACTUAL;
import static io.airlift.configuration.ConfigBinder.configBinder;
+import static org.weakref.jmx.guice.ExportBinder.newExporter;
public class DeltaLakeGlueMetastoreModule
extends AbstractConfigurationAwareModule
@@ -37,7 +42,12 @@ protected void setup(Binder binder)
configBinder(binder).bindConfig(DeltaLakeGlueMetastoreConfig.class);
install(new GlueMetastoreModule());
+ binder.bind(GlueMetastoreStats.class).in(Scopes.SINGLETON);
+ newExporter(binder).export(GlueMetastoreStats.class).withGeneratedName();
+ binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeGlueMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON);
binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true);
+ // Limit per Glue API docs (https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html#Glue-Type-TableInput-Parameters as of this writing)
+ binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(512000);
}
@ProvidesIntoOptional(ACTUAL)
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperations.java
new file mode 100644
index 000000000000..ed612e15f613
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperations.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.glue;
+
+import com.google.common.collect.ImmutableMap;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.plugin.hive.metastore.glue.GlueCache;
+import io.trino.plugin.hive.metastore.glue.GlueContext;
+import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats;
+import io.trino.spi.connector.SchemaTableName;
+import io.trino.spi.connector.TableNotFoundException;
+import software.amazon.awssdk.services.glue.GlueClient;
+import software.amazon.awssdk.services.glue.model.EntityNotFoundException;
+import software.amazon.awssdk.services.glue.model.GetTableRequest;
+import software.amazon.awssdk.services.glue.model.Table;
+import software.amazon.awssdk.services.glue.model.TableInput;
+
+import java.util.Map;
+import java.util.Optional;
+
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters;
+import static io.trino.plugin.hive.metastore.glue.GlueHiveMetastore.asTableInputBuilder;
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeGlueMetastoreTableOperations
+ implements DeltaLakeTableOperations
+{
+ private final GlueClient glueClient;
+ private final GlueContext glueContext;
+ private final GlueCache glueCache;
+ private final GlueMetastoreStats stats;
+
+ public DeltaLakeGlueMetastoreTableOperations(
+ GlueClient glueClient,
+ GlueContext glueContext,
+ GlueCache glueCache,
+ GlueMetastoreStats stats)
+ {
+ this.glueClient = requireNonNull(glueClient, "glueClient is null");
+ this.glueContext = requireNonNull(glueContext, "glueContext is null");
+ this.glueCache = requireNonNull(glueCache, "glueCache is null");
+ this.stats = requireNonNull(stats, "stats is null");
+ }
+
+ @Override
+ public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment)
+ {
+ GetTableRequest getTableRequest = GetTableRequest.builder()
+ .databaseName(schemaTableName.getSchemaName())
+ .name(schemaTableName.getTableName())
+ .build();
+ Table currentTable;
+ try {
+ currentTable = glueClient.getTable(getTableRequest).table();
+ }
+ catch (EntityNotFoundException e) {
+ throw new TableNotFoundException(schemaTableName);
+ }
+ String glueVersionId = currentTable.versionId();
+
+ stats.getUpdateTable().call(() -> glueClient.updateTable(builder -> builder
+ .applyMutation(glueContext::configureClient)
+ .databaseName(schemaTableName.getSchemaName())
+ .tableInput(convertGlueTableToTableInput(currentTable, version, schemaString, tableComment))
+ .versionId(glueVersionId)));
+ glueCache.invalidateTable(schemaTableName.getSchemaName(), schemaTableName.getTableName(), false);
+ }
+
+ private static TableInput convertGlueTableToTableInput(Table glueTable, long version, String schemaString, Optional tableComment)
+ {
+ Map parameters = ImmutableMap.builder()
+ .putAll(glueTable.parameters())
+ .putAll(tableMetadataParameters(version, schemaString, tableComment))
+ .buildKeepingLast();
+
+ return asTableInputBuilder(glueTable)
+ .parameters(parameters)
+ .build();
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperationsProvider.java
new file mode 100644
index 000000000000..7b945a5b44cc
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/DeltaLakeGlueMetastoreTableOperationsProvider.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.glue;
+
+import com.google.inject.Inject;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
+import io.trino.plugin.hive.metastore.glue.GlueCache;
+import io.trino.plugin.hive.metastore.glue.GlueContext;
+import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats;
+import io.trino.spi.connector.ConnectorSession;
+import software.amazon.awssdk.services.glue.GlueClient;
+
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeGlueMetastoreTableOperationsProvider
+ implements DeltaLakeTableOperationsProvider
+{
+ private final GlueClient glueClient;
+ private final GlueContext glueContext;
+ private final GlueCache glueCache;
+ private final GlueMetastoreStats stats;
+
+ @Inject
+ public DeltaLakeGlueMetastoreTableOperationsProvider(
+ GlueClient glueClient,
+ GlueContext glueContext,
+ GlueCache glueCache,
+ GlueMetastoreStats stats)
+ {
+ this.glueClient = requireNonNull(glueClient, "glueClient is null");
+ this.glueContext = requireNonNull(glueContext, "glueContext is null");
+ this.glueCache = requireNonNull(glueCache, "glueCache is null");
+ this.stats = requireNonNull(stats, "stats is null");
+ }
+
+ @Override
+ public DeltaLakeTableOperations createTableOperations(ConnectorSession session)
+ {
+ return new DeltaLakeGlueMetastoreTableOperations(glueClient, glueContext, glueCache, stats);
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java
index f47d806ad7bc..2a68a126d8a9 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreModule.java
@@ -16,10 +16,14 @@
import com.amazonaws.services.glue.model.Table;
import com.google.inject.Binder;
import com.google.inject.Key;
+import com.google.inject.Scopes;
import com.google.inject.TypeLiteral;
import io.airlift.configuration.AbstractConfigurationAwareModule;
import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename;
+import io.trino.plugin.deltalake.MaxTableParameterLength;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
import io.trino.plugin.deltalake.metastore.glue.DeltaLakeGlueMetastoreConfig;
+import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats;
import io.trino.plugin.hive.metastore.glue.v1.ForGlueHiveMetastore;
import io.trino.plugin.hive.metastore.glue.v1.GlueMetastoreModule;
@@ -27,6 +31,7 @@
import static com.google.inject.multibindings.OptionalBinder.newOptionalBinder;
import static io.airlift.configuration.ConfigBinder.configBinder;
+import static org.weakref.jmx.guice.ExportBinder.newExporter;
public class DeltaLakeGlueV1MetastoreModule
extends AbstractConfigurationAwareModule
@@ -40,6 +45,11 @@ protected void setup(Binder binder)
.setBinding().toProvider(DeltaLakeGlueMetastoreTableFilterProvider.class);
install(new GlueMetastoreModule());
+ binder.bind(GlueMetastoreStats.class).in(Scopes.SINGLETON);
+ newExporter(binder).export(GlueMetastoreStats.class).withGeneratedName();
+ binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeGlueV1MetastoreTableOperationsProvider.class).in(Scopes.SINGLETON);
binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true);
+ // Limit per Glue API docs (https://docs.aws.amazon.com/glue/latest/webapi/API_TableInput.html#Glue-Type-TableInput-Parameters as of this writing)
+ binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(512000);
}
}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java
new file mode 100644
index 000000000000..85dc4a7d5d6d
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperations.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.glue.v1;
+
+import com.amazonaws.services.glue.AWSGlueAsync;
+import com.amazonaws.services.glue.model.EntityNotFoundException;
+import com.amazonaws.services.glue.model.GetTableRequest;
+import com.amazonaws.services.glue.model.Table;
+import com.amazonaws.services.glue.model.TableInput;
+import com.amazonaws.services.glue.model.UpdateTableRequest;
+import com.google.common.collect.ImmutableMap;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats;
+import io.trino.spi.connector.SchemaTableName;
+import io.trino.spi.connector.TableNotFoundException;
+
+import java.util.Optional;
+
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters;
+import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertGlueTableToTableInput;
+import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeGlueV1MetastoreTableOperations
+ implements DeltaLakeTableOperations
+{
+ private final AWSGlueAsync glueClient;
+ private final GlueMetastoreStats stats;
+
+ public DeltaLakeGlueV1MetastoreTableOperations(AWSGlueAsync glueClient, GlueMetastoreStats stats)
+ {
+ this.glueClient = requireNonNull(glueClient, "glueClient is null");
+ this.stats = requireNonNull(stats, "stats is null");
+ }
+
+ @Override
+ public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment)
+ {
+ GetTableRequest getTableRequest = new GetTableRequest()
+ .withDatabaseName(schemaTableName.getSchemaName())
+ .withName(schemaTableName.getTableName());
+ Table currentTable;
+ try {
+ currentTable = glueClient.getTable(getTableRequest).getTable();
+ }
+ catch (EntityNotFoundException e) {
+ throw new TableNotFoundException(schemaTableName);
+ }
+ String glueVersionId = currentTable.getVersionId();
+
+ TableInput tableInput = convertGlueTableToTableInput(currentTable);
+ ImmutableMap.Builder parameters = ImmutableMap.builder();
+ parameters.putAll(getTableParameters(currentTable));
+ parameters.putAll(tableMetadataParameters(version, schemaString, tableComment));
+ tableInput.withParameters(parameters.buildOrThrow());
+
+ UpdateTableRequest updateTableRequest = new UpdateTableRequest()
+ .withDatabaseName(schemaTableName.getSchemaName())
+ .withTableInput(tableInput)
+ .withVersionId(glueVersionId);
+ stats.getUpdateTable().call(() -> glueClient.updateTable(updateTableRequest));
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java
new file mode 100644
index 000000000000..1c81a7f7e04b
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/glue/v1/DeltaLakeGlueV1MetastoreTableOperationsProvider.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.glue.v1;
+
+import com.amazonaws.services.glue.AWSGlueAsync;
+import com.google.inject.Inject;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
+import io.trino.plugin.hive.metastore.glue.GlueMetastoreStats;
+import io.trino.spi.connector.ConnectorSession;
+
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeGlueV1MetastoreTableOperationsProvider
+ implements DeltaLakeTableOperationsProvider
+{
+ private final AWSGlueAsync glueClient;
+ private final GlueMetastoreStats stats;
+
+ @Inject
+ public DeltaLakeGlueV1MetastoreTableOperationsProvider(AWSGlueAsync glueClient, GlueMetastoreStats stats)
+ {
+ this.glueClient = requireNonNull(glueClient, "glueClient is null");
+ this.stats = requireNonNull(stats, "stats is null");
+ }
+
+ @Override
+ public DeltaLakeTableOperations createTableOperations(ConnectorSession session)
+ {
+ return new DeltaLakeGlueV1MetastoreTableOperations(glueClient, stats);
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java
index e9954d6d7c8d..8946e5ec9c58 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreModule.java
@@ -15,8 +15,12 @@
import com.google.inject.Binder;
import com.google.inject.Key;
+import com.google.inject.Scopes;
import io.airlift.configuration.AbstractConfigurationAwareModule;
import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename;
+import io.trino.plugin.deltalake.MaxTableParameterLength;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
+import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreTableOperationsProvider;
import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreModule;
public class DeltaLakeThriftMetastoreModule
@@ -26,6 +30,13 @@ public class DeltaLakeThriftMetastoreModule
protected void setup(Binder binder)
{
install(new ThriftMetastoreModule());
+ binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeFileMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON);
binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(false);
+ // Limit per Hive metastore code (https://github.com/apache/hive/tree/7f6367e0c6e21b11ef62da1ea6681a54d547de07/standalone-metastore/metastore-server/src/main/sql as of this writing)
+ // - MySQL: mediumtext (16777215)
+ // - SQL Server: nvarchar(max) (2147483647)
+ // - Oracle: clob (4294967295)
+ // - PostgreSQL: text (unlimited)
+ binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(16777215);
}
}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperations.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperations.java
new file mode 100644
index 000000000000..055b3c0d7b81
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperations.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.thrift;
+
+import com.google.common.collect.ImmutableMap;
+import io.trino.metastore.AcidTransactionOwner;
+import io.trino.metastore.HiveMetastore;
+import io.trino.metastore.Table;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.plugin.hive.metastore.thrift.ThriftMetastore;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.connector.SchemaTableName;
+import io.trino.spi.connector.TableNotFoundException;
+
+import java.util.Map;
+import java.util.Optional;
+
+import static io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler.tableMetadataParameters;
+import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet;
+import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.fromMetastoreApiTable;
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeThriftMetastoreTableOperations
+ implements DeltaLakeTableOperations
+{
+ private final ConnectorSession session;
+ private final HiveMetastore metastore;
+ private final ThriftMetastore thriftMetastore;
+
+ public DeltaLakeThriftMetastoreTableOperations(
+ ConnectorSession session,
+ HiveMetastore metastore,
+ ThriftMetastore thriftMetastore)
+ {
+ this.session = requireNonNull(session, "session is null");
+ this.metastore = requireNonNull(metastore, "metastore is null");
+ this.thriftMetastore = requireNonNull(thriftMetastore, "thriftMetastore is null");
+ }
+
+ @Override
+ public void commitToExistingTable(SchemaTableName schemaTableName, long version, String schemaString, Optional tableComment)
+ {
+ long lockId = thriftMetastore.acquireTableExclusiveLock(
+ new AcidTransactionOwner(session.getUser()),
+ session.getQueryId(),
+ schemaTableName.getSchemaName(),
+ schemaTableName.getTableName());
+
+ try {
+ Table currentTable = fromMetastoreApiTable(thriftMetastore.getTable(schemaTableName.getSchemaName(), schemaTableName.getTableName())
+ .orElseThrow(() -> new TableNotFoundException(schemaTableName)));
+ Map parameters = ImmutableMap.builder()
+ .putAll(currentTable.getParameters())
+ .putAll(tableMetadataParameters(version, schemaString, tableComment))
+ .buildKeepingLast();
+ Table updatedTable = currentTable.withParameters(parameters);
+
+ metastore.replaceTable(currentTable.getDatabaseName(), currentTable.getTableName(), updatedTable, buildInitialPrivilegeSet(currentTable.getOwner().orElseThrow()));
+ }
+ finally {
+ thriftMetastore.releaseTableLock(lockId);
+ }
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperationsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperationsProvider.java
new file mode 100644
index 000000000000..9ff99a10cc55
--- /dev/null
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/metastore/thrift/DeltaLakeThriftMetastoreTableOperationsProvider.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore.thrift;
+
+import com.google.inject.Inject;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperations;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableOperationsProvider;
+import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
+import io.trino.plugin.hive.metastore.thrift.ThriftMetastoreFactory;
+import io.trino.spi.connector.ConnectorSession;
+import io.trino.spi.security.ConnectorIdentity;
+
+import java.util.Optional;
+
+import static java.util.Objects.requireNonNull;
+
+public class DeltaLakeThriftMetastoreTableOperationsProvider
+ implements DeltaLakeTableOperationsProvider
+{
+ private final HiveMetastoreFactory hiveMetastoreFactory;
+ private final ThriftMetastoreFactory thriftMetastoreFactory;
+
+ @Inject
+ public DeltaLakeThriftMetastoreTableOperationsProvider(HiveMetastoreFactory hiveMetastoreFactory, ThriftMetastoreFactory thriftMetastoreFactory)
+ {
+ this.hiveMetastoreFactory = requireNonNull(hiveMetastoreFactory, "hiveMetastoreFactory is null");
+ this.thriftMetastoreFactory = requireNonNull(thriftMetastoreFactory, "thriftMetastoreFactory is null");
+ }
+
+ @Override
+ public DeltaLakeTableOperations createTableOperations(ConnectorSession session)
+ {
+ Optional identity = Optional.of(session.getIdentity());
+ return new DeltaLakeThriftMetastoreTableOperations(session, hiveMetastoreFactory.createMetastore(identity), thriftMetastoreFactory.createMetastore(identity));
+ }
+}
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java
index 52c4dd645151..a36c7edb2aac 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/procedure/RegisterTableProcedure.java
@@ -27,6 +27,7 @@
import io.trino.plugin.deltalake.DeltaLakeMetadataFactory;
import io.trino.plugin.deltalake.metastore.DeltaLakeMetastore;
import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess;
+import io.trino.plugin.deltalake.transactionlog.MetadataEntry;
import io.trino.plugin.deltalake.transactionlog.TableSnapshot;
import io.trino.plugin.deltalake.transactionlog.TransactionLogAccess;
import io.trino.spi.TrinoException;
@@ -44,7 +45,6 @@
import static io.trino.plugin.base.util.Procedures.checkProcedureArgument;
import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_FILESYSTEM_ERROR;
import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_TABLE;
-import static io.trino.plugin.deltalake.DeltaLakeMetadata.buildTable;
import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogDir;
import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet;
import static io.trino.spi.StandardErrorCode.GENERIC_USER_ERROR;
@@ -158,15 +158,14 @@ private void doRegisterTable(
throw new TrinoException(DELTA_LAKE_FILESYSTEM_ERROR, format("Failed checking table location %s", tableLocation), e);
}
- Table table = buildTable(session, schemaTableName, tableLocation, true);
-
- PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
statisticsAccess.invalidateCache(schemaTableName, Optional.of(tableLocation));
transactionLogAccess.invalidateCache(schemaTableName, Optional.of(tableLocation));
// Verify we're registering a location with a valid table
+ TableSnapshot tableSnapshot;
+ MetadataEntry metadataEntry;
try {
- TableSnapshot tableSnapshot = transactionLogAccess.loadSnapshot(session, table.getSchemaTableName(), tableLocation, Optional.empty());
- transactionLogAccess.getMetadataEntry(session, tableSnapshot); // verify metadata exists
+ tableSnapshot = transactionLogAccess.loadSnapshot(session, schemaTableName, tableLocation, Optional.empty());
+ metadataEntry = transactionLogAccess.getMetadataEntry(session, tableSnapshot);
}
catch (TrinoException e) {
throw e;
@@ -175,6 +174,15 @@ private void doRegisterTable(
throw new TrinoException(DELTA_LAKE_INVALID_TABLE, "Failed to access table location: " + tableLocation, e);
}
+ Table table = metadata.buildTable(
+ session,
+ schemaTableName,
+ tableLocation,
+ true,
+ Optional.ofNullable(metadataEntry.getDescription()),
+ tableSnapshot.getVersion(),
+ metadataEntry.getSchemaString());
+ PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow());
metastore.createTable(table, principalPrivileges);
}
}
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java
index ba9cef997bc6..f576e89d542d 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java
@@ -64,6 +64,8 @@ public void testDefaults()
.setDeleteSchemaLocationsFallback(false)
.setParquetTimeZone(TimeZone.getDefault().getID())
.setPerTransactionMetastoreCacheMaximumSize(1000)
+ .setStoreTableMetadataEnabled(false)
+ .setStoreTableMetadataThreads(5)
.setTargetMaxFileSize(DataSize.of(1, GIGABYTE))
.setIdleWriterMinFileSize(DataSize.of(16, MEGABYTE))
.setUniqueTableLocation(true)
@@ -99,6 +101,8 @@ public void testExplicitPropertyMappings()
.put("delta.compression-codec", "GZIP")
.put("delta.per-transaction-metastore-cache-maximum-size", "500")
.put("delta.delete-schema-locations-fallback", "true")
+ .put("delta.metastore.store-table-metadata", "true")
+ .put("delta.metastore.store-table-metadata-threads", "1")
.put("delta.parquet.time-zone", nonDefaultTimeZone().getID())
.put("delta.target-max-file-size", "2 GB")
.put("delta.idle-writer-min-file-size", "1MB")
@@ -133,6 +137,8 @@ public void testExplicitPropertyMappings()
.setDeleteSchemaLocationsFallback(true)
.setParquetTimeZone(nonDefaultTimeZone().getID())
.setPerTransactionMetastoreCacheMaximumSize(500)
+ .setStoreTableMetadataEnabled(true)
+ .setStoreTableMetadataThreads(1)
.setTargetMaxFileSize(DataSize.of(2, GIGABYTE))
.setIdleWriterMinFileSize(DataSize.of(1, MEGABYTE))
.setUniqueTableLocation(false)
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java
index 5fe1565a51e1..42aa22aea22e 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConnectorTest.java
@@ -16,16 +16,20 @@
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Resources;
import io.airlift.units.DataSize;
import io.trino.Session;
import io.trino.execution.QueryInfo;
import io.trino.metastore.HiveMetastore;
+import io.trino.metastore.Table;
import io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.ColumnMappingMode;
import io.trino.plugin.hive.HiveCompressionCodec;
import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
import io.trino.plugin.tpch.TpchPlugin;
+import io.trino.spi.connector.ColumnMetadata;
import io.trino.sql.planner.plan.FilterNode;
import io.trino.sql.planner.plan.TableDeleteNode;
import io.trino.sql.planner.plan.TableFinishNode;
@@ -40,6 +44,7 @@
import io.trino.testing.containers.Minio;
import io.trino.testing.minio.MinioClient;
import io.trino.testing.sql.TestTable;
+import io.trino.testing.sql.TestView;
import io.trino.testing.sql.TrinoSqlExecutor;
import org.intellij.lang.annotations.Language;
import org.junit.jupiter.api.Test;
@@ -64,10 +69,13 @@
import static io.trino.plugin.deltalake.DeltaLakeMetadata.CREATE_OR_REPLACE_TABLE_OPERATION;
import static io.trino.plugin.deltalake.DeltaLakeMetadata.CREATE_TABLE_OPERATION;
import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG;
+import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.getColumnMetadata;
import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.TRANSACTION_LOG_DIRECTORY;
import static io.trino.plugin.hive.TableType.EXTERNAL_TABLE;
import static io.trino.plugin.hive.TableType.MANAGED_TABLE;
+import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet;
import static io.trino.plugin.tpch.TpchMetadata.TINY_SCHEMA_NAME;
+import static io.trino.spi.type.IntegerType.INTEGER;
import static io.trino.spi.type.TimeZoneKey.getTimeZoneKey;
import static io.trino.spi.type.VarcharType.VARCHAR;
import static io.trino.sql.planner.optimizations.PlanNodeSearcher.searchFrom;
@@ -79,11 +87,14 @@
import static io.trino.testing.TestingConnectorBehavior.SUPPORTS_CREATE_SCHEMA;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static io.trino.testing.TestingSession.testSessionBuilder;
+import static io.trino.testing.assertions.Assert.assertEventually;
import static io.trino.testing.containers.Minio.MINIO_ACCESS_KEY;
import static io.trino.testing.containers.Minio.MINIO_REGION;
import static io.trino.testing.containers.Minio.MINIO_SECRET_KEY;
+import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.util.Map.entry;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -130,9 +141,12 @@ protected QueryRunner createQueryRunner()
.put("s3.endpoint", minio.getMinioAddress())
.put("s3.path-style-access", "true")
.put("s3.streaming.part-size", "5MB") // minimize memory usage
+ .put("delta.metastore.store-table-metadata", "true")
.put("delta.enable-non-concurrent-writes", "true")
.put("delta.register-table-procedure.enabled", "true")
.buildOrThrow());
+ metastore = TestingDeltaLakeUtils.getConnectorService(queryRunner, HiveMetastoreFactory.class)
+ .createMetastore(Optional.empty());
queryRunner.execute("CREATE SCHEMA " + SCHEMA + " WITH (location = 's3://" + bucketName + "/" + SCHEMA + "')");
queryRunner.execute("CREATE SCHEMA schemawithoutunderscore WITH (location = 's3://" + bucketName + "/schemawithoutunderscore')");
@@ -4757,4 +4771,280 @@ public void testDuplicatedFieldNames()
assertQueryFails("ALTER TABLE " + table.getName() + " ALTER COLUMN col SET DATA TYPE row(x int, \"X\" int)", "This connector does not support setting column types");
}
}
+
+ @Test
+ public void testMetastoreAfterCreateTable()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) {
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("comment", "test comment"),
+ entry("trino_last_transaction_version", "0"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterCreateOrReplaceTable()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) {
+ assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + "(new_col varchar) COMMENT 'new comment'");
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("comment", "new comment"),
+ entry("trino_last_transaction_version", "1"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"new_col\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}"));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterCreateTableAsSelect()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "COMMENT 'test comment' AS SELECT 1 col")) {
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("comment", "test comment"),
+ entry("trino_last_transaction_version", "0"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterCreateOrReplaceTableAsSelect()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "COMMENT 'test comment' AS SELECT 1 col")) {
+ assertUpdate("CREATE OR REPLACE TABLE " + table.getName() + " COMMENT 'new comment' AS SELECT 'test' new_col", 1);
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("comment", "new comment"),
+ entry("trino_last_transaction_version", "1"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"new_col\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}"));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterCommentTable()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKey("comment")
+ .contains(
+ entry("trino_last_transaction_version", "0"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"));
+
+ assertUpdate("COMMENT ON TABLE " + table.getName() + " IS 'test comment'");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("comment", "test comment"),
+ entry("trino_last_transaction_version", "1"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterCommentColumn()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int COMMENT 'test comment')")) {
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKey("comment")
+ .contains(
+ entry("trino_last_transaction_version", "0"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"comment\":\"test comment\"}}]}"));
+
+ assertUpdate("COMMENT ON COLUMN " + table.getName() + ".col IS 'new test comment'");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKey("comment")
+ .contains(
+ entry("trino_last_transaction_version", "1"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{\"comment\":\"new test comment\"}}]}")));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterAlterColumn()
+ {
+ // Use 'name' column mapping mode to allow renaming columns
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int NOT NULL) WITH (column_mapping_mode = 'name')")) {
+ Map initialParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters();
+ assertThat(initialParameters)
+ .doesNotContainKey("comment")
+ .contains(entry("trino_last_transaction_version", "0"));
+ List initialColumns = getColumnMetadata(initialParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of());
+ assertThat(initialColumns).extracting(DeltaLakeColumnMetadata::columnMetadata)
+ .containsExactly(ColumnMetadata.builder().setName("col").setType(INTEGER).setNullable(false).build());
+
+ // Drop not null constraints
+ assertUpdate("ALTER TABLE " + table.getName() + " ALTER COLUMN col DROP NOT NULL");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKey("comment")
+ .contains(entry("trino_last_transaction_version", "1")));
+ Map dropNotNullParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters();
+ List dropNotNullColumns = getColumnMetadata(dropNotNullParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of());
+ assertThat(dropNotNullColumns).extracting(DeltaLakeColumnMetadata::columnMetadata)
+ .containsExactly(ColumnMetadata.builder().setName("col").setType(INTEGER).build());
+
+ // Add a new column
+ assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN new_col int COMMENT 'test comment'");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKey("comment")
+ .contains(entry("trino_last_transaction_version", "2")));
+ Map addColumnParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters();
+ List columnsAfterAddColumn = getColumnMetadata(addColumnParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of());
+ assertThat(columnsAfterAddColumn).extracting(DeltaLakeColumnMetadata::columnMetadata)
+ .containsExactly(
+ ColumnMetadata.builder().setName("col").setType(INTEGER).build(),
+ ColumnMetadata.builder().setName("new_col").setType(INTEGER).setComment(Optional.of("test comment")).build());
+
+ // Rename a column
+ assertUpdate("ALTER TABLE " + table.getName() + " RENAME COLUMN new_col TO renamed_col");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKey("comment")
+ .contains(entry("trino_last_transaction_version", "3")));
+ Map renameColumnParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters();
+ List columnsAfterRenameColumn = getColumnMetadata(renameColumnParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of());
+ assertThat(columnsAfterRenameColumn).extracting(DeltaLakeColumnMetadata::columnMetadata)
+ .containsExactly(
+ ColumnMetadata.builder().setName("col").setType(INTEGER).build(),
+ ColumnMetadata.builder().setName("renamed_col").setType(INTEGER).setComment(Optional.of("test comment")).build());
+
+ // Drop a column
+ assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN renamed_col");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKey("comment")
+ .contains(entry("trino_last_transaction_version", "4")));
+ Map dropColumnParameters = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters();
+ List columnsAfterDropColumn = getColumnMetadata(dropColumnParameters.get("trino_metadata_schema_string"), TESTING_TYPE_MANAGER, ColumnMappingMode.NAME, ImmutableList.of());
+ assertThat(columnsAfterDropColumn).extracting(DeltaLakeColumnMetadata::columnMetadata)
+ .containsExactly(ColumnMetadata.builder().setName("col").setType(INTEGER).build());
+
+ // Update the following test once the connector supports changing column types
+ assertQueryFails("ALTER TABLE " + table.getName() + " ALTER COLUMN col SET DATA TYPE bigint", "This connector does not support setting column types");
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterSetTableProperties()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertUpdate("ALTER TABLE " + table.getName() + " SET PROPERTIES change_data_feed_enabled = true");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("trino_last_transaction_version", "1"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterOptimize()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertUpdate("ALTER TABLE " + table.getName() + " EXECUTE optimize");
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("trino_last_transaction_version", "1"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterRegisterTable()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) {
+ assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1);
+ String tableLocation = metastore.getTable(SCHEMA, table.getName()).orElseThrow().getStorage().getLocation();
+ metastore.dropTable(SCHEMA, table.getName(), false);
+
+ assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(SCHEMA, table.getName(), tableLocation));
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("comment", "test comment"),
+ entry("trino_last_transaction_version", "1"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}"));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterCreateTableRemotely()
+ {
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) {
+ Table metastoreTable = metastore.getTable(SCHEMA, table.getName()).orElseThrow();
+ metastore.dropTable(SCHEMA, table.getName(), false);
+
+ // Create a table on metastore directly to avoid cache during the creation
+ Set filterKeys = ImmutableSet.of("comment", "trino_last_transaction_version", "trino_metadata_schema_string");
+ Table newMetastoreTable = Table.builder(metastoreTable)
+ .setParameters(Maps.filterKeys(metastoreTable.getParameters(), key -> !filterKeys.contains(key)))
+ .build();
+ metastore.createTable(newMetastoreTable, buildInitialPrivilegeSet(metastoreTable.getOwner().orElseThrow()));
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKeys("comment", "trino_last_transaction_version", "trino_metadata_schema_string");
+
+ // The parameters should contain the cache after the 1st access
+ assertQueryReturnsEmptyResult("SELECT * FROM " + table.getName());
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(
+ entry("comment", "test comment"),
+ entry("trino_last_transaction_version", "0"),
+ entry("trino_metadata_schema_string", "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}")));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterDataManipulation()
+ {
+ String schemaString = "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}";
+
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "0"), entry("trino_metadata_schema_string", schemaString));
+
+ assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1);
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "1"), entry("trino_metadata_schema_string", schemaString)));
+
+ assertUpdate("UPDATE " + table.getName() + " SET col = 2", 1);
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "2"), entry("trino_metadata_schema_string", schemaString)));
+
+ assertUpdate("MERGE INTO " + table.getName() + " t " +
+ "USING (SELECT * FROM (VALUES 2)) AS s(col) " +
+ "ON (t.col = s.col) " +
+ "WHEN MATCHED THEN UPDATE SET col = 3", 1);
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "3"), entry("trino_metadata_schema_string", schemaString)));
+
+ assertUpdate("DELETE FROM " + table.getName() + " WHERE col = 3", 1); // row level delete
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "4"), entry("trino_metadata_schema_string", schemaString)));
+
+ assertUpdate("DELETE FROM " + table.getName(), 0); // metadata delete
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "5"), entry("trino_metadata_schema_string", schemaString)));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterTruncateTable()
+ {
+ String schemaString = "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}";
+
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "AS SELECT 1 col")) {
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "0"), entry("trino_metadata_schema_string", schemaString));
+
+ assertUpdate("TRUNCATE TABLE " + table.getName());
+ assertEventually(() -> assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "1"), entry("trino_metadata_schema_string", schemaString)));
+ }
+ }
+
+ @Test
+ public void testMetastoreAfterCreateView()
+ {
+ try (TestView table = new TestView(getQueryRunner()::execute, "test_cache_metastore", "SELECT 1 col")) {
+ assertThat(metastore.getTable(SCHEMA, table.getName()).orElseThrow().getParameters())
+ .doesNotContainKeys("trino_last_transaction_version", "trino_metadata_schema_string")
+ .contains(entry("comment", "Presto View"));
+ }
+ }
}
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java
index 957d752a58cc..8cf9717f124c 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java
@@ -20,7 +20,9 @@
import io.opentelemetry.sdk.trace.data.SpanData;
import io.trino.Session;
import io.trino.SystemSessionProperties;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler;
import io.trino.testing.AbstractTestQueryFramework;
+import io.trino.testing.DistributedQueryRunner;
import io.trino.testing.QueryRunner;
import org.intellij.lang.annotations.Language;
import org.junit.jupiter.api.Test;
@@ -61,6 +63,9 @@ public class TestDeltaLakeFileOperations
{
private static final int MAX_PREFIXES_COUNT = 10;
+ // TODO: Consider waiting for scheduled task completion instead of manual triggering
+ private DeltaLakeTableMetadataScheduler metadataScheduler;
+
@Override
protected QueryRunner createQueryRunner()
throws Exception
@@ -68,12 +73,16 @@ protected QueryRunner createQueryRunner()
Path catalogDir = Files.createTempDirectory("catalog-dir");
closeAfterClass(() -> deleteRecursively(catalogDir, ALLOW_INSECURE));
- return DeltaLakeQueryRunner.builder()
+ DistributedQueryRunner queryRunner = DeltaLakeQueryRunner.builder()
.addCoordinatorProperty("optimizer.experimental-max-prefetched-information-schema-prefixes", Integer.toString(MAX_PREFIXES_COUNT))
.addDeltaProperty("hive.metastore.catalog.dir", catalogDir.toUri().toString())
.addDeltaProperty("delta.enable-non-concurrent-writes", "true")
.addDeltaProperty("delta.register-table-procedure.enabled", "true")
+ .addDeltaProperty("delta.metastore.store-table-metadata", "true")
+ .addDeltaProperty("delta.metastore.store-table-metadata-threads", "0") // Use the same thread to make the test deterministic
.build();
+ metadataScheduler = TestingDeltaLakeUtils.getConnectorService(queryRunner, DeltaLakeTableMetadataScheduler.class);
+ return queryRunner;
}
@Test
@@ -697,14 +706,24 @@ public void testInformationSchemaColumns()
assertUpdate(session, "CREATE TABLE test_other_select_i_s_columns" + i + "(id varchar, age integer)"); // won't match the filter
}
+ // Store table metadata in metastore for making the file access counts deterministic
+ metadataScheduler.process();
+
// Bulk retrieval
+ assertFileSystemAccesses(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA",
+ ImmutableMultiset.builder()
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables)
+ .build());
+
assertFileSystemAccesses(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA AND table_name LIKE 'test_select_i_s_columns%'",
ImmutableMultiset.builder()
- .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables)
.build());
// Pointed lookup
@@ -720,11 +739,10 @@ public void testInformationSchemaColumns()
// Pointed lookup with LIKE predicate (as if unintentional)
assertFileSystemAccesses(session, "SELECT * FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA AND table_name LIKE 'test_select_i_s_columns0'",
ImmutableMultiset.builder()
- .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables)
.build());
// Pointed lookup via DESCRIBE (which does some additional things before delegating to information_schema.columns)
@@ -763,24 +781,33 @@ public void testSystemMetadataTableComments()
assertUpdate(session, "CREATE TABLE test_other_select_s_m_t_comments" + i + "(id varchar, age integer)"); // won't match the filter
}
+ // Store table metadata in metastore for making the file access counts deterministic
+ metadataScheduler.process();
+
// Bulk retrieval
+ assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA",
+ ImmutableMultiset.builder()
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables)
+ .build());
+
assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA AND table_name LIKE 'test_select_s_m_t_comments%'",
ImmutableMultiset.builder()
- .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables)
.build());
// Bulk retrieval for two schemas
assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name IN (CURRENT_SCHEMA, 'non_existent') AND table_name LIKE 'test_select_s_m_t_comments%'",
ImmutableMultiset.builder()
- .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables)
.build());
// Pointed lookup
@@ -796,11 +823,10 @@ public void testSystemMetadataTableComments()
// Pointed lookup with LIKE predicate (as if unintentional)
assertFileSystemAccesses(session, "SELECT * FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA AND table_name LIKE 'test_select_s_m_t_comments0'",
ImmutableMultiset.builder()
- .addCopies(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.newStream"), tables * 2)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.newStream"), tables)
- .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.newStream"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000000.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000001.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000002.json", "InputFile.exists"), tables)
+ .addCopies(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", "InputFile.exists"), tables)
.build());
for (int i = 0; i < tables; i++) {
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java
index 75af796f42cd..425db8259745 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java
@@ -23,6 +23,8 @@
import io.trino.filesystem.cache.DefaultCachingHostAddressProvider;
import io.trino.filesystem.hdfs.HdfsFileSystemFactory;
import io.trino.filesystem.memory.MemoryFileSystemFactory;
+import io.trino.plugin.deltalake.metastore.DeltaLakeTableMetadataScheduler;
+import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreTableOperationsProvider;
import io.trino.plugin.deltalake.statistics.CachingExtendedStatisticsAccess;
import io.trino.plugin.deltalake.statistics.ExtendedStatistics;
import io.trino.plugin.deltalake.statistics.MetaDirStatisticsAccess;
@@ -67,6 +69,7 @@
import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY;
import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS;
import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore;
+import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER;
import static java.lang.Math.clamp;
import static org.assertj.core.api.Assertions.assertThat;
@@ -213,8 +216,9 @@ public Stream getActiveFiles(
new FileFormatDataSourceStats(),
JsonCodec.jsonCodec(LastCheckpoint.class));
+ HiveMetastoreFactory hiveMetastoreFactory = HiveMetastoreFactory.ofInstance(createTestingFileHiveMetastore(new MemoryFileSystemFactory(), Location.of("memory:///")));
DeltaLakeMetadataFactory metadataFactory = new DeltaLakeMetadataFactory(
- HiveMetastoreFactory.ofInstance(createTestingFileHiveMetastore(new MemoryFileSystemFactory(), Location.of("memory:///"))),
+ hiveMetastoreFactory,
hdfsFileSystemFactory,
transactionLogAccess,
typeManager,
@@ -229,7 +233,8 @@ public Stream getActiveFiles(
DeltaLakeRedirectionsProvider.NOOP,
new CachingExtendedStatisticsAccess(new MetaDirStatisticsAccess(HDFS_FILE_SYSTEM_FACTORY, new JsonCodecFactory().jsonCodec(ExtendedStatistics.class))),
true,
- new NodeVersion("test_version"));
+ new NodeVersion("test_version"),
+ new DeltaLakeTableMetadataScheduler(new TestingNodeManager(), TESTING_TYPE_MANAGER, new DeltaLakeFileMetastoreTableOperationsProvider(hiveMetastoreFactory), Integer.MAX_VALUE, new DeltaLakeConfig()));
ConnectorSession session = testingConnectorSessionWithConfig(deltaLakeConfig);
DeltaLakeTransactionManager deltaLakeTransactionManager = new DeltaLakeTransactionManager(metadataFactory);
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java
index 9e5af371fb43..a6785c3780c0 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeMetastoreAccessOperations.java
@@ -14,16 +14,32 @@
package io.trino.plugin.deltalake.metastore;
import com.google.common.collect.ImmutableMultiset;
+import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
+import com.google.common.collect.Sets;
+import io.opentelemetry.sdk.trace.data.SpanData;
+import io.trino.Session;
+import io.trino.metastore.HiveMetastore;
+import io.trino.metastore.Table;
import io.trino.plugin.deltalake.DeltaLakeQueryRunner;
+import io.trino.plugin.deltalake.TestingDeltaLakeUtils;
+import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
import io.trino.plugin.hive.metastore.MetastoreMethod;
import io.trino.testing.AbstractTestQueryFramework;
import io.trino.testing.QueryRunner;
+import io.trino.testing.sql.TestTable;
import org.intellij.lang.annotations.Language;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.parallel.Execution;
-import static io.trino.plugin.hive.metastore.MetastoreInvocations.assertMetastoreInvocationsForQuery;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Optional;
+import java.util.Set;
+
+import static com.google.common.collect.ImmutableList.toImmutableList;
+import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.TPCH_SCHEMA;
+import static io.trino.plugin.hive.metastore.MetastoreInvocations.filterInvocations;
import static io.trino.plugin.hive.metastore.MetastoreMethod.CREATE_TABLE;
import static io.trino.plugin.hive.metastore.MetastoreMethod.DROP_TABLE;
import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_ALL_DATABASES;
@@ -31,17 +47,34 @@
import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLE;
import static io.trino.plugin.hive.metastore.MetastoreMethod.GET_TABLES;
import static io.trino.plugin.hive.metastore.MetastoreMethod.REPLACE_TABLE;
+import static io.trino.plugin.hive.metastore.MetastoreUtil.buildInitialPrivilegeSet;
+import static io.trino.testing.MultisetAssertions.assertMultisetsEqual;
+import static java.util.Map.entry;
+import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
@Execution(SAME_THREAD) // metastore invocation counters shares mutable state so can't be run from many threads simultaneously
public class TestDeltaLakeMetastoreAccessOperations
extends AbstractTestQueryFramework
{
+ private HiveMetastore metastore;
+ private DeltaLakeTableMetadataScheduler metadataScheduler;
+
@Override
protected QueryRunner createQueryRunner()
throws Exception
{
- return DeltaLakeQueryRunner.builder().build();
+ QueryRunner queryRunner = DeltaLakeQueryRunner.builder()
+ .addDeltaProperty("delta.register-table-procedure.enabled", "true")
+ .addDeltaProperty("delta.metastore.store-table-metadata", "true")
+ .addDeltaProperty("delta.metastore.store-table-metadata-threads", "0") // Use the same thread to make the test deterministic
+ .build();
+
+ metastore = TestingDeltaLakeUtils.getConnectorService(queryRunner, HiveMetastoreFactory.class)
+ .createMetastore(Optional.empty());
+ metadataScheduler = TestingDeltaLakeUtils.getConnectorService(queryRunner, DeltaLakeTableMetadataScheduler.class);
+
+ return queryRunner;
}
@Test
@@ -254,10 +287,343 @@ public void testShowTables()
.build());
}
+ @Test
+ public void testSelectWithoutMetadataInMetastore()
+ {
+ assertUpdate("CREATE TABLE test_select_without_cache (id VARCHAR, age INT)");
+
+ removeMetadataCachingPropertiesFromMetastore("test_select_without_cache");
+ assertMetastoreInvocations(
+ getSession(),
+ "SELECT * FROM test_select_without_cache",
+ ImmutableMultiset.builder()
+ .add(GET_TABLE)
+ .build(),
+ asyncInvocations(true)); // async invocations happen because the table metadata is not stored
+ assertMetastoreInvocations("SELECT * FROM test_select_without_cache",
+ ImmutableMultiset.builder()
+ .add(GET_TABLE)
+ .build());
+ }
+
+ @Test
+ public void testUnionWithoutMetadataInMetastore()
+ {
+ assertUpdate("CREATE TABLE test_union_without_cache (id VARCHAR, age INT)");
+ assertMetastoreInvocations("SELECT * FROM test_union_without_cache UNION ALL SELECT * FROM test_union_without_cache",
+ ImmutableMultiset.builder()
+ .add(GET_TABLE)
+ .build());
+
+ removeMetadataCachingPropertiesFromMetastore("test_union_without_cache");
+ assertMetastoreInvocations(
+ getSession(),
+ "SELECT * FROM test_union_without_cache UNION ALL SELECT * FROM test_union_without_cache",
+ ImmutableMultiset.builder()
+ .add(GET_TABLE)
+ .build(),
+ asyncInvocations(true)); // async invocations happen because the table metadata is not stored
+ assertMetastoreInvocations("SELECT * FROM test_union_without_cache UNION ALL SELECT * FROM test_union_without_cache",
+ ImmutableMultiset.builder()
+ .add(GET_TABLE)
+ .build());
+ }
+
+ @Test
+ public void testSelectVersionedWithoutMetadataInMetastore()
+ {
+ assertUpdate("CREATE TABLE test_select_versioned_without_cache AS SELECT 2 as age", 1);
+
+ // Time travel query should not cache the metadata because the definition might be different from the latest verion
+ removeMetadataCachingPropertiesFromMetastore("test_select_versioned_without_cache");
+ assertMetastoreInvocations("SELECT * FROM test_select_versioned_without_cache FOR VERSION AS OF 0",
+ ImmutableMultiset.builder()
+ .add(GET_TABLE)
+ .build());
+ }
+
+ @Test
+ public void testStoreMetastoreCreateOrReplaceTable()
+ {
+ testStoreMetastoreCreateOrReplaceTable(true);
+ testStoreMetastoreCreateOrReplaceTable(false);
+ }
+
+ private void testStoreMetastoreCreateOrReplaceTable(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+
+ assertMetastoreInvocations(session, "CREATE OR REPLACE TABLE test_create_or_replace_without_cache (id VARCHAR, age INT)",
+ ImmutableMultiset.builder()
+ .add(GET_DATABASE)
+ .add(GET_TABLE)
+ .add(storeTableMetadata ? CREATE_TABLE : REPLACE_TABLE)
+ .build());
+ removeMetadataCachingPropertiesFromMetastore("test_create_or_replace_without_cache");
+ assertMetastoreInvocations(
+ session,
+ "CREATE OR REPLACE TABLE test_create_or_replace_without_cache (id VARCHAR, age INT)",
+ ImmutableMultiset.builder()
+ .add(GET_DATABASE)
+ .add(GET_TABLE)
+ .add(REPLACE_TABLE)
+ .build(),
+ asyncInvocations(storeTableMetadata));
+ }
+
+ @Test
+ public void testStoreMetastoreCreateTableOrReplaceTableAsSelect()
+ {
+ testStoreMetastoreCreateTableOrReplaceTableAsSelect(true);
+ testStoreMetastoreCreateTableOrReplaceTableAsSelect(false);
+ }
+
+ private void testStoreMetastoreCreateTableOrReplaceTableAsSelect(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+
+ assertMetastoreInvocations(session, "CREATE OR REPLACE TABLE test_ctas_without_cache AS SELECT 1 AS age",
+ ImmutableMultiset.builder()
+ .add(GET_DATABASE)
+ .add(storeTableMetadata ? CREATE_TABLE : REPLACE_TABLE)
+ .add(GET_TABLE)
+ .build());
+ removeMetadataCachingPropertiesFromMetastore("test_ctas_without_cache");
+ assertMetastoreInvocations(session, "CREATE OR REPLACE TABLE test_ctas_without_cache AS SELECT 1 AS age", ImmutableMultiset.builder()
+ .add(GET_DATABASE)
+ .add(GET_TABLE)
+ .add(REPLACE_TABLE)
+ .build(),
+ asyncInvocations(storeTableMetadata));
+ }
+
+ @Test
+ public void testStoreMetastoreCommentTable()
+ {
+ testStoreMetastoreCommentTable(true);
+ testStoreMetastoreCommentTable(false);
+ }
+
+ private void testStoreMetastoreCommentTable(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertMetastoreInvocations(session, "COMMENT ON TABLE " + table.getName() + " IS 'test comment'", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreCommentColumn()
+ {
+ testStoreMetastoreCommentColumn(true);
+ testStoreMetastoreCommentColumn(false);
+ }
+
+ private void testStoreMetastoreCommentColumn(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int COMMENT 'test comment')")) {
+ assertMetastoreInvocations(session, "COMMENT ON COLUMN " + table.getName() + ".col IS 'new test comment'", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreAlterColumn()
+ {
+ testStoreMetastoreAlterColumn(true);
+ testStoreMetastoreAlterColumn(false);
+ }
+
+ private void testStoreMetastoreAlterColumn(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+
+ // Use 'name' column mapping mode to allow renaming columns
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int NOT NULL) WITH (column_mapping_mode = 'name')")) {
+ assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " ALTER COLUMN col DROP NOT NULL", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " ADD COLUMN new_col int COMMENT 'test comment'", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " RENAME COLUMN new_col TO renamed_col", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " DROP COLUMN renamed_col", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ // Update the following test once the connector supports changing column types
+ assertQueryFails(session, "ALTER TABLE " + table.getName() + " ALTER COLUMN col SET DATA TYPE bigint", "This connector does not support setting column types");
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreSetTableProperties()
+ {
+ testStoreMetastoreSetTableProperties(true);
+ testStoreMetastoreSetTableProperties(false);
+ }
+
+ private void testStoreMetastoreSetTableProperties(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " SET PROPERTIES change_data_feed_enabled = true", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreOptimize()
+ {
+ testStoreMetastoreOptimize(true);
+ testStoreMetastoreOptimize(false);
+ }
+
+ private void testStoreMetastoreOptimize(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertMetastoreInvocations(session, "ALTER TABLE " + table.getName() + " EXECUTE optimize", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreVacuum()
+ {
+ testStoreMetastoreVacuum(true);
+ testStoreMetastoreVacuum(false);
+ }
+
+ private void testStoreMetastoreVacuum(boolean storeTableMetadata)
+ {
+ Session session = Session.builder(getSession())
+ .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "store_table_metadata", Boolean.toString(storeTableMetadata))
+ .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "vacuum_min_retention", "0s")
+ .build();
+
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "AS SELECT 1 a")) {
+ assertUpdate("UPDATE " + table.getName() + " SET a = 2", 1);
+ assertMetastoreInvocations(
+ session,
+ "CALL system.vacuum(schema_name => CURRENT_SCHEMA, table_name => '" + table.getName() + "', retention => '0s')",
+ ImmutableMultiset.of(GET_TABLE));
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreRegisterTable()
+ {
+ testStoreMetastoreRegisterTable(true);
+ testStoreMetastoreRegisterTable(false);
+ }
+
+ private void testStoreMetastoreRegisterTable(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int) COMMENT 'test comment'")) {
+ assertUpdate("INSERT INTO " + table.getName() + " VALUES 1", 1);
+ String tableLocation = metastore.getTable(TPCH_SCHEMA, table.getName()).orElseThrow().getStorage().getLocation();
+ metastore.dropTable(TPCH_SCHEMA, table.getName(), false);
+
+ assertMetastoreInvocations(
+ session,
+ "CALL system.register_table('%s', '%s', '%s')".formatted(TPCH_SCHEMA, table.getName(), tableLocation),
+ ImmutableMultiset.of(GET_DATABASE, CREATE_TABLE));
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreDataManipulation()
+ {
+ testStoreMetastoreDataManipulation(true);
+ testStoreMetastoreDataManipulation(false);
+ }
+
+ private void testStoreMetastoreDataManipulation(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+ String schemaString = "{\"type\":\"struct\",\"fields\":[{\"name\":\"col\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}";
+
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "(col int)")) {
+ assertThat(metastore.getTable(TPCH_SCHEMA, table.getName()).orElseThrow().getParameters())
+ .contains(entry("trino_last_transaction_version", "0"), entry("trino_metadata_schema_string", schemaString));
+
+ assertMetastoreInvocations(session, "INSERT INTO " + table.getName() + " VALUES 1", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ assertMetastoreInvocations(session, "UPDATE " + table.getName() + " SET col = 2", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ assertMetastoreInvocations(session, "MERGE INTO " + table.getName() + " t " +
+ "USING (SELECT * FROM (VALUES 2)) AS s(col) " +
+ "ON (t.col = s.col) " +
+ "WHEN MATCHED THEN UPDATE SET col = 3",
+ ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ assertMetastoreInvocations(session, "DELETE FROM " + table.getName() + " WHERE col = 3", ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); // row level delete
+ assertMetastoreInvocations(session, "DELETE FROM " + table.getName(), ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata)); // metadata delete
+ }
+ }
+
+ @Test
+ public void testStoreMetastoreTruncateTable()
+ {
+ testStoreMetastoreTruncateTable(true);
+ testStoreMetastoreTruncateTable(false);
+ }
+
+ private void testStoreMetastoreTruncateTable(boolean storeTableMetadata)
+ {
+ Session session = sessionWithStoreTableMetadata(storeTableMetadata);
+ try (TestTable table = new TestTable(getQueryRunner()::execute, "test_cache_metastore", "AS SELECT 1 col")) {
+ assertMetastoreInvocations(session, "TRUNCATE TABLE " + table.getName(), ImmutableMultiset.of(GET_TABLE), asyncInvocations(storeTableMetadata));
+ }
+ }
+
+ private void removeMetadataCachingPropertiesFromMetastore(String tableName)
+ {
+ Table table = metastore.getTable(getSession().getSchema().orElseThrow(), tableName).orElseThrow();
+ Table newMetastoreTable = Table.builder(table)
+ .setParameters(Maps.filterKeys(table.getParameters(), key -> !key.equals("trino_last_transaction_version")))
+ .build();
+ metastore.replaceTable(table.getDatabaseName(), table.getTableName(), newMetastoreTable, buildInitialPrivilegeSet(table.getOwner().orElseThrow()));
+ }
+
+ private Session sessionWithStoreTableMetadata(boolean storeTableMetadata)
+ {
+ return Session.builder(getSession())
+ .setCatalogSessionProperty(getSession().getCatalog().orElseThrow(), "store_table_metadata", Boolean.toString(storeTableMetadata))
+ .build();
+ }
+
private void assertMetastoreInvocations(@Language("SQL") String query, Multiset expectedInvocations)
+ {
+ assertMetastoreInvocations(getSession(), query, expectedInvocations, ImmutableMultiset.of());
+ }
+
+ private void assertMetastoreInvocations(Session session, @Language("SQL") String query, Multiset expectedInvocations)
+ {
+ assertMetastoreInvocations(session, query, expectedInvocations, ImmutableMultiset.of());
+ }
+
+ private void assertMetastoreInvocations(Session session, @Language("SQL") String query, Multiset expectedInvocations, Multiset asyncInvocations)
{
assertUpdate("CALL system.flush_metadata_cache()");
- assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), getSession(), query, expectedInvocations);
+ metadataScheduler.clear();
+ assertMetastoreInvocationsForQuery(getDistributedQueryRunner(), session, query, expectedInvocations, () -> metadataScheduler.process(), asyncInvocations);
+ }
+
+ private static Multiset asyncInvocations(boolean storeTableParameter)
+ {
+ return storeTableParameter ? ImmutableMultiset.of(GET_TABLE, REPLACE_TABLE) : ImmutableMultiset.of();
+ }
+
+ private static void assertMetastoreInvocationsForQuery(
+ QueryRunner queryRunner,
+ Session session,
+ @Language("SQL") String query,
+ Multiset expectedInvocations,
+ Runnable asyncOperation,
+ Multiset expectedInvocationsAfterAsync)
+ {
+ queryRunner.execute(session, query);
+ List spansBeforeAsync = queryRunner.getSpans();
+
+ asyncOperation.run();
+ Set spansAfterAsync = Sets.difference(new HashSet<>(queryRunner.getSpans()), new HashSet<>(spansBeforeAsync));
+
+ Multiset invocations = filterInvocations(spansBeforeAsync);
+ assertMultisetsEqual(invocations, expectedInvocations);
+
+ Multiset asyncInvocations = filterInvocations(spansAfterAsync.stream().collect(toImmutableList()));
+ assertMultisetsEqual(asyncInvocations, expectedInvocationsAfterAsync);
}
}
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeTableMetadataScheduler.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeTableMetadataScheduler.java
new file mode 100644
index 000000000000..1e07f7402ba5
--- /dev/null
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestDeltaLakeTableMetadataScheduler.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.trino.plugin.deltalake.metastore;
+
+import io.trino.Session;
+import io.trino.metastore.HiveMetastore;
+import io.trino.plugin.deltalake.TestingDeltaLakePlugin;
+import io.trino.plugin.jmx.JmxPlugin;
+import io.trino.testing.AbstractTestQueryFramework;
+import io.trino.testing.DistributedQueryRunner;
+import io.trino.testing.QueryRunner;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.shaded.com.google.common.collect.ImmutableMap;
+
+import java.lang.reflect.InvocationTargetException;
+import java.nio.file.Path;
+import java.time.Duration;
+import java.util.Optional;
+import java.util.stream.IntStream;
+
+import static com.google.common.reflect.Reflection.newProxy;
+import static com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
+import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore;
+import static io.trino.testing.TestingSession.testSessionBuilder;
+import static java.time.temporal.ChronoUnit.SECONDS;
+
+final class TestDeltaLakeTableMetadataScheduler
+ extends AbstractTestQueryFramework
+{
+ @Override
+ protected QueryRunner createQueryRunner()
+ throws Exception
+ {
+ Session session = testSessionBuilder()
+ .setCatalog("delta")
+ .setSchema("default")
+ .build();
+
+ QueryRunner queryRunner = DistributedQueryRunner.builder(session).build();
+ Path dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("delta");
+ HiveMetastore metastore = createTestingFileHiveMetastore(dataDirectory.toFile());
+
+ HiveMetastore proxiedMetastore = newProxy(HiveMetastore.class, (_, method, args) -> {
+ try {
+ if (method.getName().equals("replaceTable")) {
+ throw new UnsupportedOperationException();
+ }
+ return method.invoke(metastore, args);
+ }
+ catch (InvocationTargetException e) {
+ throw e.getCause();
+ }
+ });
+
+ queryRunner.installPlugin(new TestingDeltaLakePlugin(dataDirectory, Optional.of(new TestingDeltaLakeMetastoreModule(proxiedMetastore))));
+ queryRunner.createCatalog("delta", "delta_lake", ImmutableMap.of("delta.metastore.store-table-metadata", "true"));
+
+ queryRunner.installPlugin(new JmxPlugin());
+ queryRunner.createCatalog("jmx", "jmx");
+
+ queryRunner.execute("CREATE SCHEMA delta.default");
+
+ return queryRunner;
+ }
+
+ @Test
+ @Disabled // TODO Enable after fixing the flaky assertion with JMX
+ void testFailureStopScheduler()
+ {
+ String coordinatorId = (String) computeScalar("SELECT node_id FROM system.runtime.nodes WHERE coordinator = true");
+
+ IntStream.range(0, 11).forEach(i -> assertUpdate("CREATE TABLE test_" + i + "(x int) WITH (column_mapping_mode = 'name')"));
+
+ assertQuery(
+ "SELECT shutdown FROM jmx.current.\"trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler\" " +
+ "WHERE node = '" + coordinatorId + "'",
+ "VALUES false");
+
+ // The max failure count is 10, so the scheduler should be stopped after 11 operations
+ IntStream.range(0, 11).forEach(i -> {
+ assertUpdate("ALTER TABLE test_" + i + " RENAME COLUMN x to y");
+ assertUpdate("COMMENT ON TABLE test_" + i + " IS 'test comment'");
+ });
+ sleepUninterruptibly(Duration.of(1, SECONDS));
+
+ assertQuery(
+ "SELECT shutdown FROM jmx.current.\"trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler\" " +
+ "WHERE node = '" + coordinatorId + "'",
+ "VALUES true");
+
+ // Metadata should return the correct values regardless of the scheduler status
+ assertQuery(
+ "SELECT table_name, column_name FROM information_schema.columns WHERE table_schema = CURRENT_SCHEMA",
+ "VALUES " +
+ "('test_0', 'y'), " +
+ "('test_1', 'y'), " +
+ "('test_2', 'y'), " +
+ "('test_3', 'y'), " +
+ "('test_4', 'y'), " +
+ "('test_5', 'y'), " +
+ "('test_6', 'y'), " +
+ "('test_7', 'y'), " +
+ "('test_8', 'y'), " +
+ "('test_9', 'y'), " +
+ "('test_10', 'y')");
+ assertQuery(
+ "SELECT table_name, comment FROM system.metadata.table_comments WHERE schema_name = CURRENT_SCHEMA",
+ "VALUES " +
+ "('test_0', 'test comment'), " +
+ "('test_1', 'test comment'), " +
+ "('test_2', 'test comment'), " +
+ "('test_3', 'test comment'), " +
+ "('test_4', 'test comment'), " +
+ "('test_5', 'test comment'), " +
+ "('test_6', 'test comment'), " +
+ "('test_7', 'test comment'), " +
+ "('test_8', 'test comment'), " +
+ "('test_9', 'test comment'), " +
+ "('test_10', 'test comment')");
+ }
+}
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java
index a6bf3d3b138e..adea03218e84 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/metastore/TestingDeltaLakeMetastoreModule.java
@@ -15,9 +15,12 @@
import com.google.inject.Binder;
import com.google.inject.Key;
+import com.google.inject.Scopes;
import io.airlift.configuration.AbstractConfigurationAwareModule;
import io.trino.metastore.HiveMetastore;
import io.trino.plugin.deltalake.AllowDeltaLakeManagedTableRename;
+import io.trino.plugin.deltalake.MaxTableParameterLength;
+import io.trino.plugin.deltalake.metastore.file.DeltaLakeFileMetastoreTableOperationsProvider;
import io.trino.plugin.hive.HideDeltaLakeTables;
import io.trino.plugin.hive.metastore.CachingHiveMetastoreModule;
import io.trino.plugin.hive.metastore.HiveMetastoreFactory;
@@ -40,8 +43,10 @@ public void setup(Binder binder)
{
binder.bind(HiveMetastoreFactory.class).annotatedWith(RawHiveMetastoreFactory.class).toInstance(HiveMetastoreFactory.ofInstance(metastore));
install(new CachingHiveMetastoreModule(false));
+ binder.bind(DeltaLakeTableOperationsProvider.class).to(DeltaLakeFileMetastoreTableOperationsProvider.class).in(Scopes.SINGLETON);
binder.bind(Key.get(boolean.class, HideDeltaLakeTables.class)).toInstance(false);
binder.bind(Key.get(boolean.class, AllowDeltaLakeManagedTableRename.class)).toInstance(true);
+ binder.bind(Key.get(int.class, MaxTableParameterLength.class)).toInstance(Integer.MAX_VALUE);
}
}
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java
index 65ba98816c06..65f89b24c407 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java
@@ -667,7 +667,7 @@ public void renameTable(String databaseName, String tableName, String newDatabas
}
}
- private static TableInput.Builder asTableInputBuilder(software.amazon.awssdk.services.glue.model.Table table)
+ public static TableInput.Builder asTableInputBuilder(software.amazon.awssdk.services.glue.model.Table table)
{
return TableInput.builder()
.name(table.name())
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java
index eb7de260bbbe..9bb7a00ebf7e 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/GlueHiveMetastore.java
@@ -155,10 +155,10 @@
import static io.trino.plugin.hive.metastore.MetastoreUtil.verifyCanDropColumn;
import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertFunction;
+import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertGlueTableToTableInput;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueInputConverter.convertPartition;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType;
-import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.mappedCopy;
import static io.trino.plugin.hive.util.HiveUtil.escapeSchemaName;
import static io.trino.spi.StandardErrorCode.ALREADY_EXISTS;
@@ -510,7 +510,7 @@ public void renameTable(String databaseName, String tableName, String newDatabas
GetTableRequest getTableRequest = new GetTableRequest().withDatabaseName(databaseName)
.withName(tableName);
GetTableResult glueTable = glueClient.getTable(getTableRequest);
- TableInput tableInput = convertGlueTableToTableInput(glueTable.getTable(), newTableName);
+ TableInput tableInput = convertGlueTableToTableInput(glueTable.getTable()).withName(newTableName);
CreateTableRequest createTableRequest = new CreateTableRequest()
.withDatabaseName(newDatabaseName)
.withTableInput(tableInput);
@@ -533,24 +533,6 @@ public void renameTable(String databaseName, String tableName, String newDatabas
}
}
- private static TableInput convertGlueTableToTableInput(com.amazonaws.services.glue.model.Table glueTable, String newTableName)
- {
- return new TableInput()
- .withName(newTableName)
- .withDescription(glueTable.getDescription())
- .withOwner(glueTable.getOwner())
- .withLastAccessTime(glueTable.getLastAccessTime())
- .withLastAnalyzedTime(glueTable.getLastAnalyzedTime())
- .withRetention(glueTable.getRetention())
- .withStorageDescriptor(glueTable.getStorageDescriptor())
- .withPartitionKeys(glueTable.getPartitionKeys())
- .withViewOriginalText(glueTable.getViewOriginalText())
- .withViewExpandedText(glueTable.getViewExpandedText())
- .withTableType(getTableTypeNullable(glueTable))
- .withTargetTable(glueTable.getTargetTable())
- .withParameters(getTableParameters(glueTable));
- }
-
@Override
public void commentTable(String databaseName, String tableName, Optional comment)
{
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java
index 3b7e0d7948b0..5ed0a2266b31 100644
--- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java
+++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java
@@ -48,6 +48,8 @@
import static io.trino.plugin.hive.metastore.MetastoreUtil.metastoreFunctionName;
import static io.trino.plugin.hive.metastore.MetastoreUtil.toResourceUris;
import static io.trino.plugin.hive.metastore.MetastoreUtil.updateStatisticsParameters;
+import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
+import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable;
public final class GlueInputConverter
{
@@ -89,6 +91,24 @@ public static TableInput convertTable(Table table)
return input;
}
+ public static TableInput convertGlueTableToTableInput(com.amazonaws.services.glue.model.Table glueTable)
+ {
+ return new TableInput()
+ .withName(glueTable.getName())
+ .withDescription(glueTable.getDescription())
+ .withOwner(glueTable.getOwner())
+ .withLastAccessTime(glueTable.getLastAccessTime())
+ .withLastAnalyzedTime(glueTable.getLastAnalyzedTime())
+ .withRetention(glueTable.getRetention())
+ .withStorageDescriptor(glueTable.getStorageDescriptor())
+ .withPartitionKeys(glueTable.getPartitionKeys())
+ .withViewOriginalText(glueTable.getViewOriginalText())
+ .withViewExpandedText(glueTable.getViewExpandedText())
+ .withTableType(getTableTypeNullable(glueTable))
+ .withTargetTable(glueTable.getTargetTable())
+ .withParameters(getTableParameters(glueTable));
+ }
+
public static PartitionInput convertPartition(PartitionWithStatistics partitionWithStatistics)
{
PartitionInput input = convertPartition(partitionWithStatistics.getPartition());
diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java
index c15757c9269d..62deb99c088f 100644
--- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java
+++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/MetastoreInvocations.java
@@ -19,6 +19,8 @@
import io.trino.testing.QueryRunner;
import org.intellij.lang.annotations.Language;
+import java.util.List;
+
import static com.google.common.collect.ImmutableMultiset.toImmutableMultiset;
import static io.trino.testing.MultisetAssertions.assertMultisetsEqual;
@@ -36,7 +38,12 @@ public static void assertMetastoreInvocationsForQuery(
{
queryRunner.execute(session, query);
- Multiset invocations = queryRunner.getSpans().stream()
+ assertMultisetsEqual(filterInvocations(queryRunner.getSpans()), expectedInvocations);
+ }
+
+ public static Multiset filterInvocations(List spans)
+ {
+ return spans.stream()
.map(SpanData::getName)
.filter(name -> name.startsWith(TRACE_PREFIX))
.map(name -> name.substring(TRACE_PREFIX.length()))
@@ -44,7 +51,5 @@ public static void assertMetastoreInvocationsForQuery(
.filter(name -> !name.equals("listTablePrivileges"))
.map(MetastoreMethod::fromMethodName)
.collect(toImmutableMultiset());
-
- assertMultisetsEqual(invocations, expectedInvocations);
}
}
diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java
index 628bc23a1018..d2b2286ebe55 100644
--- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java
+++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/deltalake/TestDeltaLakeJmx.java
@@ -36,7 +36,9 @@ public void testJmxTablesExposedByDeltaLakeConnectorBackedByGlueMetastore()
assertThat(onTrino().executeQuery("SHOW TABLES IN jmx.current LIKE '%name=delta%'")).containsOnly(
row("io.trino.plugin.hive.metastore.cache:name=delta,type=cachinghivemetastore"),
row("io.trino.plugin.hive.metastore.glue:name=delta,type=gluehivemetastore"),
+ row("io.trino.plugin.hive.metastore.glue:name=delta,type=gluemetastorestats"),
row("io.trino.plugin.hive:catalog=delta,name=delta,type=fileformatdatasourcestats"),
+ row("trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler"),
row("trino.plugin.deltalake.transactionlog:catalog=delta,name=delta,type=transactionlogaccess"));
}
@@ -47,6 +49,7 @@ public void testJmxTablesExposedByDeltaLakeConnectorBackedByThriftMetastore()
row("io.trino.plugin.hive.metastore.cache:name=delta,type=cachinghivemetastore"),
row("io.trino.plugin.hive.metastore.thrift:name=delta,type=thrifthivemetastore"),
row("io.trino.plugin.hive:catalog=delta,name=delta,type=fileformatdatasourcestats"),
+ row("trino.plugin.deltalake.metastore:catalog=delta,name=delta,type=deltalaketablemetadatascheduler"),
row("trino.plugin.deltalake.transactionlog:catalog=delta,name=delta,type=transactionlogaccess"));
}
}