diff --git a/core/trino-main/src/main/java/io/trino/sql/planner/StatisticsAggregationPlanner.java b/core/trino-main/src/main/java/io/trino/sql/planner/StatisticsAggregationPlanner.java index 22cf672d5081..6df4c8722fcf 100644 --- a/core/trino-main/src/main/java/io/trino/sql/planner/StatisticsAggregationPlanner.java +++ b/core/trino-main/src/main/java/io/trino/sql/planner/StatisticsAggregationPlanner.java @@ -94,7 +94,7 @@ public TableStatisticAggregation createStatisticsAggregation(TableStatisticsMeta for (ColumnStatisticMetadata columnStatisticMetadata : statisticsMetadata.getColumnStatistics()) { String columnName = columnStatisticMetadata.getColumnName(); Symbol inputSymbol = columnToSymbolMap.get(columnName); - verifyNotNull(inputSymbol, "inputSymbol is null"); + verifyNotNull(inputSymbol, "no symbol for [%s] column, these columns exist: %s", columnName, columnToSymbolMap.keySet()); Type inputType = symbolAllocator.getTypes().get(inputSymbol); verifyNotNull(inputType, "inputType is null for symbol: %s", inputSymbol); ColumnStatisticsAggregation aggregation; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java index 150278774a3e..ea50911b9f17 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java @@ -40,6 +40,7 @@ @DefunctConfig("delta.experimental.ignore-checkpoint-write-failures") public class DeltaLakeConfig { + public static final String EXTENDED_STATISTICS_ENABLED = "delta.extended-statistics.enabled"; public static final String VACUUM_MIN_RETENTION = "delta.vacuum.min-retention"; // Runtime.getRuntime().maxMemory() is not 100% stable and may return slightly different value over JVM lifetime. We use @@ -66,6 +67,7 @@ public class DeltaLakeConfig private Duration dynamicFilteringWaitTimeout = new Duration(0, SECONDS); private boolean tableStatisticsEnabled = true; private boolean extendedStatisticsEnabled = true; + private boolean collectExtendedStatisticsColumnStatisticsOnWrite = true; private HiveCompressionCodec compressionCodec = HiveCompressionCodec.SNAPPY; private long perTransactionMetastoreCacheMaximumSize = 1000; private boolean deleteSchemaLocationsFallback; @@ -321,7 +323,7 @@ public boolean isExtendedStatisticsEnabled() return extendedStatisticsEnabled; } - @Config("delta.extended-statistics.enabled") + @Config(EXTENDED_STATISTICS_ENABLED) @ConfigDescription("Use extended statistics collected by ANALYZE") public DeltaLakeConfig setExtendedStatisticsEnabled(boolean extendedStatisticsEnabled) { @@ -329,6 +331,19 @@ public DeltaLakeConfig setExtendedStatisticsEnabled(boolean extendedStatisticsEn return this; } + public boolean isCollectExtendedStatisticsColumnStatisticsOnWrite() + { + return collectExtendedStatisticsColumnStatisticsOnWrite; + } + + @Config("delta.extended-statistics.collect-on-write") + @ConfigDescription("Enables automatic column level extended statistics collection on write") + public DeltaLakeConfig setCollectExtendedStatisticsColumnStatisticsOnWrite(boolean collectExtendedStatisticsColumnStatisticsOnWrite) + { + this.collectExtendedStatisticsColumnStatisticsOnWrite = collectExtendedStatisticsColumnStatisticsOnWrite; + return this; + } + @NotNull public HiveCompressionCodec getCompressionCodec() { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index 61ac309d8a6f..75f1e713f02c 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -136,6 +136,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; @@ -150,6 +151,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.ImmutableSet.toImmutableSet; +import static com.google.common.collect.MoreCollectors.toOptional; import static io.trino.plugin.deltalake.DeltaLakeColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME; import static io.trino.plugin.deltalake.DeltaLakeColumnHandle.MERGE_ROW_ID_TYPE; import static io.trino.plugin.deltalake.DeltaLakeColumnHandle.ROW_ID_COLUMN_NAME; @@ -162,6 +164,7 @@ import static io.trino.plugin.deltalake.DeltaLakeColumnType.SYNTHESIZED; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_BAD_WRITE; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isCollectExtendedStatisticsColumnStatisticsOnWrite; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isExtendedStatisticsEnabled; import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isTableStatisticsEnabled; import static io.trino.plugin.deltalake.DeltaLakeTableProperties.CHECKPOINT_INTERVAL_PROPERTY; @@ -987,6 +990,15 @@ public Optional finishCreateTable( handle.getComment()); appendAddFileEntries(transactionLogWriter, dataFileInfos, handle.getPartitionedBy(), true); transactionLogWriter.flush(); + + if (isCollectExtendedStatisticsColumnStatisticsOnWrite(session) && !computedStatistics.isEmpty()) { + updateTableStatistics( + session, + Optional.empty(), + location, + computedStatistics); + } + PrincipalPrivileges principalPrivileges = buildInitialPrivilegeSet(table.getOwner().orElseThrow()); try { @@ -1092,7 +1104,7 @@ public void setColumnComment(ConnectorSession session, ConnectorTableHandle tabl ImmutableMap.Builder columnComments = ImmutableMap.builder(); columnComments.putAll(getColumnComments(deltaLakeTableHandle.getMetadataEntry()).entrySet().stream() .filter(e -> !e.getKey().equals(deltaLakeColumnHandle.getName())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + .collect(Collectors.toMap(Entry::getKey, Entry::getValue))); comment.ifPresent(s -> columnComments.put(deltaLakeColumnHandle.getName(), s)); TransactionLogWriter transactionLogWriter = transactionLogWriterFactory.newWriter(session, deltaLakeTableHandle.getLocation()); @@ -2148,7 +2160,7 @@ public Optional> applyFilter(C ImmutableMap.Builder enforceableDomains = ImmutableMap.builder(); ImmutableMap.Builder unenforceableDomains = ImmutableMap.builder(); - for (Map.Entry domainEntry : constraintDomains.entrySet()) { + for (Entry domainEntry : constraintDomains.entrySet()) { DeltaLakeColumnHandle column = (DeltaLakeColumnHandle) domainEntry.getKey(); if (!partitionColumns.contains(column)) { unenforceableDomains.put(column, domainEntry.getValue()); @@ -2234,9 +2246,10 @@ public Optional applyTableScanRedirect(Conne public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata(ConnectorSession session, ConnectorTableHandle tableHandle, Map analyzeProperties) { if (!isExtendedStatisticsEnabled(session)) { - throw new TrinoException( - NOT_SUPPORTED, - "ANALYZE not supported if extended statistics are disabled. Enable via delta.extended-statistics.enabled config property or extended_statistics_enabled session property."); + throw new TrinoException(NOT_SUPPORTED, format( + "ANALYZE not supported if extended statistics are disabled. Enable via %s config property or %s session property.", + DeltaLakeConfig.EXTENDED_STATISTICS_ENABLED, + DeltaLakeSessionProperties.EXTENDED_STATISTICS_ENABLED)); } DeltaLakeTableHandle handle = (DeltaLakeTableHandle) tableHandle; @@ -2256,6 +2269,9 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata(ConnectorSession alreadyAnalyzedModifiedTimeMax.orElse(Instant.ofEpochMilli(0)))); } + Set allColumnNames = extractColumnMetadata(metadata, typeManager).stream() + .map(ColumnMetadata::getName) + .collect(toImmutableSet()); Optional> analyzeColumnNames = DeltaLakeAnalyzeProperties.getColumnNames(analyzeProperties); if (analyzeColumnNames.isPresent()) { Set columnNames = analyzeColumnNames.get(); @@ -2264,9 +2280,6 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata(ConnectorSession throw new TrinoException(INVALID_ANALYZE_PROPERTY, "Cannot specify empty list of columns for analysis"); } - Set allColumnNames = extractColumnMetadata(metadata, typeManager).stream() - .map(ColumnMetadata::getName) - .collect(toImmutableSet()); if (!allColumnNames.containsAll(columnNames)) { throw new TrinoException( INVALID_ANALYZE_PROPERTY, @@ -2299,31 +2312,61 @@ public ConnectorAnalyzeMetadata getStatisticsCollectionMetadata(ConnectorSession handle.getReadVersion(), false); + TableStatisticsMetadata statisticsMetadata = getStatisticsCollectionMetadata( + statistics, + extractColumnMetadata(metadata, typeManager), + analyzeColumnNames.orElse(allColumnNames), + true); + + return new ConnectorAnalyzeMetadata(newHandle, statisticsMetadata); + } + + @Override + public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(ConnectorSession session, ConnectorTableMetadata tableMetadata) + { + if (!isCollectExtendedStatisticsColumnStatisticsOnWrite(session)) { + return TableStatisticsMetadata.empty(); + } + + Set allColumnNames = tableMetadata.getColumns().stream() + .map(ColumnMetadata::getName) + .collect(toImmutableSet()); + + return getStatisticsCollectionMetadata( + Optional.empty(), // TODO what about INSERT to an existing table + tableMetadata.getColumns(), + allColumnNames, + false); + } + + private TableStatisticsMetadata getStatisticsCollectionMetadata( + Optional existingStatistics, + List tableColumns, + Set analyzeColumnNames, + boolean includeMaxFileModifiedTime) + { ImmutableSet.Builder columnStatistics = ImmutableSet.builder(); - extractColumnMetadata(metadata, typeManager).stream() + tableColumns.stream() .filter(DeltaLakeMetadata::shouldCollectExtendedStatistics) - .filter(columnMetadata -> - analyzeColumnNames - .map(columnNames -> columnNames.contains(columnMetadata.getName())) - .orElse(true)) + .filter(columnMetadata -> analyzeColumnNames.contains(columnMetadata.getName())) .forEach(columnMetadata -> { if (!(columnMetadata.getType() instanceof FixedWidthType)) { - if (statistics.isEmpty() || totalSizeStatisticsExists(statistics.get().getColumnStatistics(), columnMetadata.getName())) { + if (existingStatistics.isEmpty() || totalSizeStatisticsExists(existingStatistics.get().getColumnStatistics(), columnMetadata.getName())) { columnStatistics.add(new ColumnStatisticMetadata(columnMetadata.getName(), TOTAL_SIZE_IN_BYTES)); } } columnStatistics.add(new ColumnStatisticMetadata(columnMetadata.getName(), NUMBER_OF_DISTINCT_VALUES_SUMMARY)); }); - // collect max(file modification time) for sake of incremental ANALYZE - columnStatistics.add(new ColumnStatisticMetadata(FILE_MODIFIED_TIME_COLUMN_NAME, MAX_VALUE)); + if (includeMaxFileModifiedTime) { + // collect max(file modification time) for sake of incremental ANALYZE + columnStatistics.add(new ColumnStatisticMetadata(FILE_MODIFIED_TIME_COLUMN_NAME, MAX_VALUE)); + } - TableStatisticsMetadata statisticsMetadata = new TableStatisticsMetadata( + return new TableStatisticsMetadata( columnStatistics.build(), ImmutableSet.of(), ImmutableList.of()); - - return new ConnectorAnalyzeMetadata(newHandle, statisticsMetadata); } private static boolean shouldCollectExtendedStatistics(ColumnMetadata columnMetadata) @@ -2356,6 +2399,15 @@ public void finishStatisticsCollection(ConnectorSession session, ConnectorTableH DeltaLakeTableHandle tableHandle = (DeltaLakeTableHandle) table; AnalyzeHandle analyzeHandle = tableHandle.getAnalyzeHandle().orElseThrow(() -> new IllegalArgumentException("analyzeHandle not set")); String location = metastore.getTableLocation(tableHandle.getSchemaTableName(), session); + updateTableStatistics( + session, + Optional.of(analyzeHandle), + location, + computedStatistics); + } + + private void updateTableStatistics(ConnectorSession session, Optional analyzeHandle, String location, Collection computedStatistics) + { Optional oldStatistics = statisticsAccess.readExtendedStatistics(session, location); // more elaborate logic for handling statistics model evaluation may need to be introduced in the future @@ -2369,17 +2421,17 @@ public void finishStatisticsCollection(ConnectorSession session, ConnectorTableH .orElseGet(ImmutableMap::of); Map newColumnStatistics = toDeltaLakeColumnStatistics(computedStatistics); - Map mergedColumnStatistics = new HashMap<>(); - - // only keep stats for existing columns - Set newColumns = newColumnStatistics.keySet(); - oldColumnStatistics.entrySet().stream() - .filter(entry -> newColumns.contains(entry.getKey())) - .forEach(entry -> mergedColumnStatistics.put(entry.getKey(), entry.getValue())); - - newColumnStatistics.forEach((columnName, columnStatistics) -> { - mergedColumnStatistics.merge(columnName, columnStatistics, DeltaLakeColumnStatistics::update); - }); + Map mergedColumnStatistics = newColumnStatistics.entrySet().stream() + .collect(toImmutableMap( + Entry::getKey, + entry -> { + String columnName = entry.getKey(); + DeltaLakeColumnStatistics newStats = entry.getValue(); + DeltaLakeColumnStatistics oldStats = oldColumnStatistics.get(columnName); + return oldStats == null + ? newStats + : oldStats.update(newStats); + })); Optional maxFileModificationTime = getMaxFileModificationTime(computedStatistics); // We do not want to hinder our future calls to ANALYZE if one of the files we analyzed have modification time far in the future. @@ -2393,18 +2445,17 @@ public void finishStatisticsCollection(ConnectorSession session, ConnectorTableH finalAlreadyAnalyzedModifiedTimeMax = Comparators.max(oldStatistics.get().getAlreadyAnalyzedModifiedTimeMax(), finalAlreadyAnalyzedModifiedTimeMax); } - if (analyzeHandle.getColumns().isPresent() && !mergedColumnStatistics.keySet().equals(analyzeHandle.getColumns().get())) { - // sanity validation - throw new IllegalStateException( - format("Unexpected columns in in mergedColumnStatistics %s; expected %s", - mergedColumnStatistics.keySet(), - analyzeHandle.getColumns().get())); - } + analyzeHandle.flatMap(AnalyzeHandle::getColumns).ifPresent(analyzeColumns -> { + if (!mergedColumnStatistics.keySet().equals(analyzeColumns)) { + // sanity validation + throw new IllegalStateException(format("Unexpected columns in in mergedColumnStatistics %s; expected %s", mergedColumnStatistics.keySet(), analyzeColumns)); + } + }); ExtendedStatistics mergedExtendedStatistics = new ExtendedStatistics( finalAlreadyAnalyzedModifiedTimeMax, mergedColumnStatistics, - analyzeHandle.getColumns()); + analyzeHandle.flatMap(AnalyzeHandle::getColumns)); statisticsAccess.updateExtendedStatistics(session, location, mergedExtendedStatistics); } @@ -2513,7 +2564,7 @@ private static Map toDeltaLakeColumnStatistic // Only statistics for whole table are collected ComputedStatistics singleStatistics = Iterables.getOnlyElement(computedStatistics); return createColumnToComputedStatisticsMap(singleStatistics.getColumnStatistics()).entrySet().stream() - .collect(toImmutableMap(Map.Entry::getKey, entry -> createDeltaLakeColumnStatistics(entry.getValue()))); + .collect(toImmutableMap(Entry::getKey, entry -> createDeltaLakeColumnStatistics(entry.getValue()))); } private static Map> createColumnToComputedStatisticsMap(Map computedStatistics) @@ -2576,8 +2627,8 @@ private static Optional getMaxFileModificationTime(Collection> sessionProperties; @@ -140,6 +141,11 @@ public DeltaLakeSessionProperties( "Use extended statistics collected by ANALYZE", deltaLakeConfig.isExtendedStatisticsEnabled(), false), + booleanProperty( + COLLECT_EXTENDED_STATISTICS_ON_WRITE, + "Enables automatic column level extended statistics collection on write", + deltaLakeConfig.isCollectExtendedStatisticsColumnStatisticsOnWrite(), + false), enumProperty( COMPRESSION_CODEC, "Compression codec to use when writing new data files", @@ -219,6 +225,11 @@ public static boolean isExtendedStatisticsEnabled(ConnectorSession session) return session.getProperty(EXTENDED_STATISTICS_ENABLED, Boolean.class); } + public static boolean isCollectExtendedStatisticsColumnStatisticsOnWrite(ConnectorSession session) + { + return session.getProperty(COLLECT_EXTENDED_STATISTICS_ON_WRITE, Boolean.class); + } + public static HiveCompressionCodec getCompressionCodec(ConnectorSession session) { return session.getProperty(COMPRESSION_CODEC, HiveCompressionCodec.class); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/AbstractTestDeltaLakeCreateTableStatistics.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseTestDeltaLakeCreateTableStatistics.java similarity index 99% rename from plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/AbstractTestDeltaLakeCreateTableStatistics.java rename to plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseTestDeltaLakeCreateTableStatistics.java index 16e80b1ae6b3..7bee9057121a 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/AbstractTestDeltaLakeCreateTableStatistics.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/BaseTestDeltaLakeCreateTableStatistics.java @@ -65,7 +65,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.assertEquals; -public abstract class AbstractTestDeltaLakeCreateTableStatistics +public abstract class BaseTestDeltaLakeCreateTableStatistics extends AbstractTestQueryFramework { private static final String SCHEMA = "default"; diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java index 409a8d7b5498..f2c016436a8b 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java @@ -31,6 +31,7 @@ import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.TPCH_SCHEMA; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.createDeltaLakeQueryRunner; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.COLLECT_EXTENDED_STATISTICS_ON_WRITE; import static io.trino.testing.TestingAccessControlManager.TestingPrivilegeType.INSERT_TABLE; import static io.trino.testing.TestingAccessControlManager.privilege; import static io.trino.testing.sql.TestTable.randomTableSuffix; @@ -67,9 +68,12 @@ public void testAnalyzeWithCheckpoints() private void testAnalyze(Optional checkpointInterval) { String tableName = "test_analyze_" + randomTableSuffix(); - assertUpdate("CREATE TABLE " + tableName - + (checkpointInterval.isPresent() ? format(" WITH (checkpoint_interval = %s)", checkpointInterval.get()) : "") - + " AS SELECT * FROM tpch.sf1.nation", 25); + assertUpdate( + disableStatisticsCollectionOnWrite(getSession()), + "CREATE TABLE " + tableName + " " + + (checkpointInterval.isPresent() ? format(" WITH (checkpoint_interval = %s)", checkpointInterval.get()) : "") + + " AS SELECT * FROM tpch.sf1.nation", + 25); assertQuery( "SHOW STATS FOR " + tableName, @@ -147,11 +151,14 @@ private void testAnalyze(Optional checkpointInterval) public void testAnalyzePartitioned() { String tableName = "test_analyze_" + randomTableSuffix(); - assertUpdate("CREATE TABLE " + tableName + assertUpdate( + disableStatisticsCollectionOnWrite(getSession()), + "CREATE TABLE " + tableName + " WITH (" + " partitioned_by = ARRAY['regionkey']" + ")" - + "AS SELECT * FROM tpch.sf1.nation", 25); + + "AS SELECT * FROM tpch.sf1.nation", + 25); assertQuery( "SHOW STATS FOR " + tableName, @@ -276,7 +283,7 @@ public void testAnalyzeWithFilesModifiedAfter() { String tableName = "test_analyze_" + randomTableSuffix(); - assertUpdate("CREATE TABLE " + tableName + " AS SELECT * FROM tpch.sf1.nation", 25); + assertUpdate(disableStatisticsCollectionOnWrite(getSession()), "CREATE TABLE " + tableName + " AS SELECT * FROM tpch.sf1.nation", 25); Thread.sleep(100); Instant afterInitialDataIngestion = Instant.now(); @@ -389,7 +396,7 @@ public void testAnalyzeSomeColumns() public void testDropExtendedStats() { try (TestTable table = new TestTable( - getQueryRunner()::execute, + sql -> getQueryRunner().execute(disableStatisticsCollectionOnWrite(getSession()), sql), "test_drop_extended_stats", "AS SELECT * FROM tpch.sf1.nation")) { String query = "SHOW STATS FOR " + table.getName(); @@ -481,4 +488,11 @@ private OperatorStats getOperatorStats(QueryId queryId) .filter(summary -> summary.getOperatorType().contains("Scan")) .collect(onlyElement()); } + + private static Session disableStatisticsCollectionOnWrite(Session session) + { + return Session.builder(session) + .setCatalogSessionProperty(session.getCatalog().orElseThrow(), COLLECT_EXTENDED_STATISTICS_ON_WRITE, "false") + .build(); + } } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java index 30b6adc67d4c..a4306319292a 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java @@ -58,6 +58,7 @@ public void testDefaults() .setDynamicFilteringWaitTimeout(new Duration(0, SECONDS)) .setTableStatisticsEnabled(true) .setExtendedStatisticsEnabled(true) + .setCollectExtendedStatisticsColumnStatisticsOnWrite(true) .setCompressionCodec(HiveCompressionCodec.SNAPPY) .setDeleteSchemaLocationsFallback(false) .setParquetTimeZone(TimeZone.getDefault().getID()) @@ -89,6 +90,7 @@ public void testExplicitPropertyMappings() .put("delta.dynamic-filtering.wait-timeout", "30m") .put("delta.table-statistics-enabled", "false") .put("delta.extended-statistics.enabled", "false") + .put("delta.extended-statistics.collect-on-write", "false") .put("delta.compression-codec", "GZIP") .put("delta.per-transaction-metastore-cache-maximum-size", "500") .put("delta.delete-schema-locations-fallback", "true") @@ -117,6 +119,7 @@ public void testExplicitPropertyMappings() .setDynamicFilteringWaitTimeout(new Duration(30, MINUTES)) .setTableStatisticsEnabled(false) .setExtendedStatisticsEnabled(false) + .setCollectExtendedStatisticsColumnStatisticsOnWrite(false) .setCompressionCodec(HiveCompressionCodec.GZIP) .setDeleteSchemaLocationsFallback(true) .setParquetTimeZone(nonDefaultTimeZone().getID()) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java index 5ac4fe9d23e8..8693857018dd 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatistics.java @@ -18,7 +18,7 @@ import java.util.Map; public class TestDeltaLakeCreateTableStatistics - extends AbstractTestDeltaLakeCreateTableStatistics + extends BaseTestDeltaLakeCreateTableStatistics { @Override Map additionalProperties() diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsLegacyWriter.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsLegacyWriter.java index 70688182535f..9b51ef0691a5 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsLegacyWriter.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeCreateTableStatisticsLegacyWriter.java @@ -36,7 +36,7 @@ import static org.testng.Assert.assertEquals; public class TestDeltaLakeCreateTableStatisticsLegacyWriter - extends AbstractTestDeltaLakeCreateTableStatistics + extends BaseTestDeltaLakeCreateTableStatistics { @Override Map additionalProperties() diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java index 2a182ad484a2..badd1f894e68 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakePlugin.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableMap; import io.airlift.bootstrap.ApplicationConfigurationException; +import io.trino.plugin.hive.HiveConfig; import io.trino.spi.Plugin; import io.trino.spi.connector.Connector; import io.trino.spi.connector.ConnectorFactory; @@ -142,6 +143,19 @@ public void testNoActiveDataFilesCaching() new TestingConnectorContext()); } + @Test + public void testHiveConfigIsNotBound() + { + ConnectorFactory factory = getOnlyElement(new DeltaLakePlugin().getConnectorFactories()); + assertThatThrownBy(() -> factory.create("test", + ImmutableMap.of( + "hive.metastore.uri", "thrift://foo:1234", + // Try setting any property provided by HiveConfig class + HiveConfig.CONFIGURATION_HIVE_PARTITION_PROJECTION_ENABLED, "true"), + new TestingConnectorContext())) + .hasMessageContaining("Error: Configuration property 'hive.partition-projection-enabled' was not used"); + } + @Test public void testReadOnlyAllAccessControl() { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index ab667a28f67b..408eedb3a029 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -3263,7 +3263,7 @@ public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(Connector return TableStatisticsMetadata.empty(); } if (isTransactional(tableMetadata.getProperties()).orElse(false)) { - // TODO(https://github.com/trinodb/trino/issues/1956) updating table statistics for trasactional not supported right now. + // TODO(https://github.com/trinodb/trino/issues/1956) updating table statistics for transactional not supported right now. return TableStatisticsMetadata.empty(); } List partitionedBy = firstNonNull(getPartitionedBy(tableMetadata.getProperties()), ImmutableList.of());