diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index 02bed7b9622d..b8cf57d21bd7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -965,6 +965,7 @@ private Map getEmptyTableProperties(ConnectorTableMetadata table List columns = getOrcBloomFilterColumns(tableMetadata.getProperties()); if (columns != null && !columns.isEmpty()) { checkFormatForProperty(hiveStorageFormat, HiveStorageFormat.ORC, ORC_BLOOM_FILTER_COLUMNS); + validateOrcBloomFilterColumns(tableMetadata, columns); tableProperties.put(ORC_BLOOM_FILTER_COLUMNS_KEY, Joiner.on(",").join(columns)); tableProperties.put(ORC_BLOOM_FILTER_FPP_KEY, String.valueOf(getOrcBloomFilterFpp(tableMetadata.getProperties()))); } @@ -1063,6 +1064,16 @@ private static void checkFormatForProperty(HiveStorageFormat actualStorageFormat } } + private void validateOrcBloomFilterColumns(ConnectorTableMetadata tableMetadata, List orcBloomFilterColumns) + { + Set allColumns = tableMetadata.getColumns().stream() + .map(ColumnMetadata::getName) + .collect(toImmutableSet()); + if (!allColumns.containsAll(orcBloomFilterColumns)) { + throw new TrinoException(INVALID_TABLE_PROPERTY, format("Orc bloom filter columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(orcBloomFilterColumns), allColumns))); + } + } + private String validateAndNormalizeAvroSchemaUrl(String url, HdfsContext context) { try { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index cf4c0566077d..70b819ba9188 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -3815,7 +3815,7 @@ public void testShowCreateTable() " bucketed_by = ARRAY['c1','c 2'],\n" + " bucketing_version = 1,\n" + " format = 'ORC',\n" + - " orc_bloom_filter_columns = ARRAY['c1','c2'],\n" + + " orc_bloom_filter_columns = ARRAY['c1','c 2'],\n" + " orc_bloom_filter_fpp = 7E-1,\n" + " partitioned_by = ARRAY['c5'],\n" + " sorted_by = ARRAY['c1','c 2 DESC'],\n" + diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index d69f4f57b1ab..fac97331c90a 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -16,6 +16,8 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Sets; import io.airlift.slice.Slice; import io.airlift.slice.SliceUtf8; import io.airlift.slice.Slices; @@ -62,6 +64,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; import java.util.regex.Pattern; @@ -69,6 +72,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Lists.reverse; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.hive.HiveMetadata.TABLE_COMMENT; @@ -415,6 +419,7 @@ public static Transaction newCreateTableTransaction(TrinoCatalog catalog, Connec List columns = getOrcBloomFilterColumns(tableMetadata.getProperties()); if (!columns.isEmpty()) { checkFormatForProperty(fileFormat.toIceberg(), FileFormat.ORC, ORC_BLOOM_FILTER_COLUMNS); + validateOrcBloomFilterColumns(tableMetadata, columns); propertiesBuilder.put(ORC_BLOOM_FILTER_COLUMNS_KEY, Joiner.on(",").join(columns)); propertiesBuilder.put(ORC_BLOOM_FILTER_FPP_KEY, String.valueOf(getOrcBloomFilterFpp(tableMetadata.getProperties()))); } @@ -443,4 +448,14 @@ private static void checkFormatForProperty(FileFormat actualStorageFormat, FileF throw new TrinoException(INVALID_TABLE_PROPERTY, format("Cannot specify %s table property for storage format: %s", propertyName, actualStorageFormat)); } } + + private static void validateOrcBloomFilterColumns(ConnectorTableMetadata tableMetadata, List orcBloomFilterColumns) + { + Set allColumns = tableMetadata.getColumns().stream() + .map(ColumnMetadata::getName) + .collect(toImmutableSet()); + if (!allColumns.containsAll(orcBloomFilterColumns)) { + throw new TrinoException(INVALID_TABLE_PROPERTY, format("Orc bloom filter columns %s not present in schema", Sets.difference(ImmutableSet.copyOf(orcBloomFilterColumns), allColumns))); + } + } } diff --git a/testing/trino-testing/src/main/java/io/trino/testing/BaseOrcWithBloomFiltersTest.java b/testing/trino-testing/src/main/java/io/trino/testing/BaseOrcWithBloomFiltersTest.java index 336e09844a4e..fa84fed7c86a 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/BaseOrcWithBloomFiltersTest.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/BaseOrcWithBloomFiltersTest.java @@ -20,6 +20,7 @@ import static io.trino.testing.sql.TestTable.randomTableSuffix; import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; public abstract class BaseOrcWithBloomFiltersTest extends AbstractTestQueryFramework @@ -42,6 +43,18 @@ public void testOrcBloomFilterIsWrittenDuringCreate() assertUpdate("DROP TABLE " + tableName); } + @Test + public void testInvalidOrcBloomFilterColumnsDuringCreate() + { + String tableName = "create_orc_with_bloom_filters_" + randomTableSuffix(); + assertThatThrownBy(() -> computeActual( + format( + "CREATE TABLE %s WITH (%s) AS SELECT orderstatus FROM tpch.tiny.orders", + tableName, + getTableProperties("totalprice", "orderstatus")))) + .hasMessage("Orc bloom filter columns [totalprice] not present in schema"); + } + @Test public void testOrcBloomFilterIsWrittenDuringInsert() {