From cc9bede251a45ed09c02da62cf072e2b0917758a Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Mon, 16 May 2022 11:59:05 +0900 Subject: [PATCH] Fix failure when ACID table column name collides with ACID internal names Previously, HiveMetadata.columnMetadataGetter threw an exception due to duplicated keys when the table contains column names of ACID format columns (e.g. operation) and it's ACID table. --- .../io/trino/plugin/hive/HiveMetadata.java | 25 +------------------ .../plugin/hive/BaseHiveConnectorTest.java | 24 ++++++++++++++++++ .../hive/TestHiveTransactionalTable.java | 23 +++++++++++++++++ 3 files changed, 48 insertions(+), 24 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index d0c42e690105..02bed7b9622d 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -34,7 +34,6 @@ import io.trino.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior; import io.trino.plugin.hive.LocationService.WriteInfo; import io.trino.plugin.hive.acid.AcidOperation; -import io.trino.plugin.hive.acid.AcidSchema; import io.trino.plugin.hive.acid.AcidTransaction; import io.trino.plugin.hive.fs.DirectoryLister; import io.trino.plugin.hive.metastore.Column; @@ -163,14 +162,9 @@ import static io.trino.plugin.hive.HiveApplyProjectionUtil.replaceWithNewVariables; import static io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics; import static io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics; -import static io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED; -import static io.trino.plugin.hive.HiveColumnHandle.FILE_MODIFIED_TIME_COLUMN_NAME; -import static io.trino.plugin.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME; -import static io.trino.plugin.hive.HiveColumnHandle.PARTITION_COLUMN_NAME; -import static io.trino.plugin.hive.HiveColumnHandle.PATH_COLUMN_NAME; import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; import static io.trino.plugin.hive.HiveColumnHandle.updateRowIdColumnHandle; import static io.trino.plugin.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH; @@ -3411,29 +3405,12 @@ private static Function columnMetadataGetter(T } } - // add hidden columns - builder.put(PATH_COLUMN_NAME, Optional.empty()); - if (table.getStorage().getBucketProperty().isPresent()) { - builder.put(BUCKET_COLUMN_NAME, Optional.empty()); - } - builder.put(FILE_SIZE_COLUMN_NAME, Optional.empty()); - builder.put(FILE_MODIFIED_TIME_COLUMN_NAME, Optional.empty()); - if (!table.getPartitionColumns().isEmpty()) { - builder.put(PARTITION_COLUMN_NAME, Optional.empty()); - } - - if (isFullAcidTable(table.getParameters())) { - for (String name : AcidSchema.ACID_COLUMN_NAMES) { - builder.put(name, Optional.empty()); - } - } - Map> columnComment = builder.buildOrThrow(); return handle -> ColumnMetadata.builder() .setName(handle.getName()) .setType(handle.getType()) - .setComment(columnComment.get(handle.getName())) + .setComment(handle.isHidden() ? Optional.empty() : columnComment.get(handle.getName())) .setExtraInfo(Optional.ofNullable(columnExtraInfo(handle.isPartitionKey()))) .setHidden(handle.isHidden()) .build(); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index 15fc44a5898e..1a91aea9bdf0 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -8328,6 +8328,30 @@ public void testUseColumnNames(HiveStorageFormat format, boolean formatUseColumn assertUpdate("DROP TABLE " + tableName); } + @Test(dataProvider = "hiddenColumnNames") + public void testHiddenColumnNameConflict(String columnName) + { + try (TestTable table = new TestTable( + getQueryRunner()::execute, + "test_hidden_column_name_conflict", + format("(\"%s\" int, _bucket int, _partition int) WITH (partitioned_by = ARRAY['_partition'], bucketed_by = ARRAY['_bucket'], bucket_count = 10)", columnName))) { + assertThatThrownBy(() -> query("SELECT * FROM " + table.getName())) + .hasMessageContaining("Multiple entries with same key: " + columnName); + } + } + + @DataProvider + public Object[][] hiddenColumnNames() + { + return new Object[][] { + {"$path"}, + {"$bucket"}, + {"$file_size"}, + {"$file_modified_time"}, + {"$partition"}, + }; + } + @Test(dataProvider = "legalUseColumnNamesProvider") public void testUseColumnAddDrop(HiveStorageFormat format, boolean formatUseColumnNames) { diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java index d643db9d9d00..b8c727b497b7 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java @@ -537,6 +537,29 @@ public void testCreateAcidTable(boolean isPartitioned, BucketingType bucketingTy }); } + @Test(groups = HIVE_TRANSACTIONAL, dataProvider = "acidFormatColumnNames") + public void testAcidTableColumnNameConflict(String columnName) + { + withTemporaryTable("acid_column_name_conflict", true, true, NONE, tableName -> { + onHive().executeQuery("CREATE TABLE " + tableName + " (`" + columnName + "` INTEGER, fcol INTEGER, partcol INTEGER) STORED AS ORC " + hiveTableProperties(ACID, NONE)); + onTrino().executeQuery("INSERT INTO " + tableName + " VALUES (1, 2, 3)"); + assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(row(1, 2, 3)); + }); + } + + @DataProvider + public Object[][] acidFormatColumnNames() + { + return new Object[][] { + {"operation"}, + {"originalTransaction"}, + {"bucket"}, + {"rowId"}, + {"row"}, + {"currentTransaction"}, + }; + } + @Test(groups = HIVE_TRANSACTIONAL) public void testSimpleUnpartitionedTransactionalInsert() {