From a8ed1764231e3a8252c48aadcdfa8fc545c153ca Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Mon, 6 Mar 2023 10:36:48 +0900 Subject: [PATCH 1/7] Fix typo --- .../hive/metastore/SemiTransactionalHiveMetastore.java | 2 +- .../plugin/hive/metastore/glue/TestHiveGlueMetastore.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java index e18f93bf4b4a..7f25f155c090 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java @@ -3633,7 +3633,7 @@ public static void cleanExtraOutputFiles(HdfsEnvironment hdfsEnvironment, HdfsCo log.debug("Deleting failed attempt files from %s for query %s", path, queryId); FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, path); if (!fileSystem.exists(path)) { - // directory may nat exit if no files were actually written + // directory may not exit if no files were actually written return; } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java index 9a05b1db007f..1f44f2afde40 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastore.java @@ -1370,7 +1370,7 @@ private void doGetPartitionsFilterTest( List> expectedValuesList) throws Exception { - try (CloseableSchamaTableName closeableTableName = new CloseableSchamaTableName(temporaryTable(("get_partitions")))) { + try (CloseableSchemaTableName closeableTableName = new CloseableSchemaTableName(temporaryTable(("get_partitions")))) { SchemaTableName tableName = closeableTableName.getSchemaTableName(); createDummyPartitionedTable(tableName, columnMetadata, partitionColumnNames, partitionValues); HiveMetastore metastoreClient = getMetastoreClient(); @@ -1419,12 +1419,12 @@ private void createDummyPartitionedTable(SchemaTableName tableName, List EMPTY_TABLE_STATISTICS)); } - private class CloseableSchamaTableName + private class CloseableSchemaTableName implements AutoCloseable { private final SchemaTableName schemaTableName; - private CloseableSchamaTableName(SchemaTableName schemaTableName) + private CloseableSchemaTableName(SchemaTableName schemaTableName) { this.schemaTableName = schemaTableName; } From 63f66acbabea5bc06a34498c19fec596c2d3bc8a Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Mon, 6 Mar 2023 14:53:23 +0900 Subject: [PATCH 2/7] Make writeId for alterPartitions optional The field isn't required in Thrift Hive metastore. --- .../main/java/io/trino/plugin/hive/HiveMetastoreClosure.java | 2 +- .../trino/plugin/hive/metastore/ForwardingHiveMetastore.java | 2 +- .../java/io/trino/plugin/hive/metastore/HiveMetastore.java | 2 +- .../hive/metastore/SemiTransactionalHiveMetastore.java | 2 +- .../plugin/hive/metastore/cache/CachingHiveMetastore.java | 2 +- .../plugin/hive/metastore/thrift/BridgingHiveMetastore.java | 4 ++-- .../metastore/thrift/FailureAwareThriftMetastoreClient.java | 3 ++- .../plugin/hive/metastore/thrift/ThriftHiveMetastore.java | 4 ++-- .../hive/metastore/thrift/ThriftHiveMetastoreClient.java | 5 +++-- .../trino/plugin/hive/metastore/thrift/ThriftMetastore.java | 2 +- .../plugin/hive/metastore/thrift/ThriftMetastoreClient.java | 3 ++- .../hive/metastore/thrift/MockThriftMetastoreClient.java | 3 ++- 12 files changed, 19 insertions(+), 15 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java index e2a223f7e7a1..bf5fffd89339 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java @@ -373,7 +373,7 @@ public void updateTableWriteId(String dbName, String tableName, long transaction delegate.updateTableWriteId(dbName, tableName, transactionId, writeId, rowCountChange); } - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { delegate.alterPartitions(dbName, tableName, partitions, writeId); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java index 468aeb5f3cec..6feb6343b3b2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java @@ -427,7 +427,7 @@ public void alterPartitions( String dbName, String tableName, List partitions, - long writeId) + OptionalLong writeId) { delegate.alterPartitions(dbName, tableName, partitions, writeId); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java index 3eec2fa212eb..8e526e4ddc36 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java @@ -211,7 +211,7 @@ default void updateTableWriteId(String dbName, String tableName, long transactio throw new UnsupportedOperationException(); } - default void alterPartitions(String dbName, String tableName, List partitions, long writeId) + default void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { throw new UnsupportedOperationException(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java index 7f25f155c090..d5c13d276f29 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java @@ -3611,7 +3611,7 @@ public void updateTableWriteId(String dbName, String tableName, long transaction delegate.updateTableWriteId(dbName, tableName, transactionId, writeId, rowCountChange); } - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { delegate.alterPartitions(dbName, tableName, partitions, writeId); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java index 7e053467e0ac..161bf865666a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java @@ -1132,7 +1132,7 @@ public void updateTableWriteId(String dbName, String tableName, long transaction } @Override - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { try { delegate.alterPartitions(dbName, tableName, partitions, writeId); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java index 99ca6d5e2227..4fabccfcf585 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java @@ -525,11 +525,11 @@ public void updateTableWriteId(String dbName, String tableName, long transaction } @Override - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { List hadoopPartitions = partitions.stream() .map(ThriftMetastoreUtil::toMetastoreApiPartition) - .peek(partition -> partition.setWriteId(writeId)) + .peek(partition -> writeId.ifPresent(partition::setWriteId)) .collect(toImmutableList()); delegate.alterPartitions(dbName, tableName, hadoopPartitions, writeId); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/FailureAwareThriftMetastoreClient.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/FailureAwareThriftMetastoreClient.java index 613c6423654d..2736c0133d79 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/FailureAwareThriftMetastoreClient.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/FailureAwareThriftMetastoreClient.java @@ -34,6 +34,7 @@ import java.util.List; import java.util.Map; +import java.util.OptionalLong; import static java.util.Objects.requireNonNull; @@ -412,7 +413,7 @@ public List allocateTableWriteIds(String database, String tableNam } @Override - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) throws TException { runWithHandle(() -> delegate.alterPartitions(dbName, tableName, partitions, writeId)); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index fe0ca923c499..07a21bc8b4c0 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -1870,9 +1870,9 @@ public void updateTableWriteId(String dbName, String tableName, long transaction } @Override - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { - checkArgument(writeId > 0, "writeId should be a positive integer, but was %s", writeId); + checkArgument(writeId.isEmpty() || writeId.getAsLong() > 0, "writeId should be a positive integer, but was %s", writeId); try { retry() .stopOnIllegalExceptions() diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastoreClient.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastoreClient.java index 80e94ac172bb..080c0b98a0f8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastoreClient.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftHiveMetastoreClient.java @@ -73,6 +73,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.OptionalLong; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import java.util.regex.Pattern; @@ -685,7 +686,7 @@ public List allocateTableWriteIds(String dbName, String tableName, } @Override - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) throws TException { alternativeCall( @@ -693,7 +694,7 @@ public void alterPartitions(String dbName, String tableName, List par chosenAlterPartitionsAlternative, () -> { AlterPartitionsRequest request = new AlterPartitionsRequest(dbName, tableName, partitions); - request.setWriteId(writeId); + writeId.ifPresent(request::setWriteId); client.alterPartitionsReq(request); return null; }, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java index 97aa01ac3c26..fb9e1024cc1d 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastore.java @@ -211,7 +211,7 @@ default void updateTableWriteId(String dbName, String tableName, long transactio throw new UnsupportedOperationException(); } - default void alterPartitions(String dbName, String tableName, List partitions, long writeId) + default void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { throw new UnsupportedOperationException(); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreClient.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreClient.java index 48510dfb6550..eb3e2095dfbd 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreClient.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreClient.java @@ -34,6 +34,7 @@ import java.io.Closeable; import java.util.List; import java.util.Map; +import java.util.OptionalLong; public interface ThriftMetastoreClient extends Closeable @@ -194,7 +195,7 @@ default List allocateTableWriteIds(String database, String tableNa throw new UnsupportedOperationException(); } - void alterPartitions(String dbName, String tableName, List partitions, long writeId) + void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) throws TException; void addDynamicPartitions(String dbName, String tableName, List partitionNames, long transactionId, long writeId, AcidOperation operation) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java index acc15c6c59f1..a076557f3ce1 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/thrift/MockThriftMetastoreClient.java @@ -44,6 +44,7 @@ import java.util.List; import java.util.Map; +import java.util.OptionalLong; import java.util.concurrent.atomic.AtomicInteger; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -504,7 +505,7 @@ public String getConfigValue(String name, String defaultValue) } @Override - public void alterPartitions(String dbName, String tableName, List partitions, long writeId) + public void alterPartitions(String dbName, String tableName, List partitions, OptionalLong writeId) { throw new UnsupportedOperationException(); } From 3c15ec30ee23f86e4861df415f29e2657a79b7d8 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Fri, 3 Feb 2023 15:54:20 +0900 Subject: [PATCH 3/7] Fix reading ORC files after column evolved from tinyint to integer --- .../src/main/java/io/trino/orc/reader/ByteColumnReader.java | 1 - .../src/main/java/io/trino/orc/reader/ColumnReaders.java | 4 ---- .../tests/product/hive/TestHiveTransactionalTable.java | 6 +++--- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java b/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java index ae2160d36c5b..1dc87789cc4d 100644 --- a/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java +++ b/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java @@ -77,7 +77,6 @@ public ByteColumnReader(Type type, OrcColumn column, LocalMemoryContext memoryCo throws OrcCorruptionException { this.type = requireNonNull(type, "type is null"); - // Iceberg maps ORC tinyint type to integer verifyStreamType(column, type, t -> t == TINYINT || t == INTEGER); this.column = requireNonNull(column, "column is null"); diff --git a/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java b/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java index 63b9205a3b70..24bf0c2bf61d 100644 --- a/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java +++ b/lib/trino-orc/src/main/java/io/trino/orc/reader/ColumnReaders.java @@ -27,7 +27,6 @@ import static io.trino.orc.metadata.OrcType.OrcTypeKind.BINARY; import static io.trino.orc.metadata.OrcType.OrcTypeKind.LONG; import static io.trino.orc.reader.ReaderUtils.invalidStreamType; -import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.TimeType.TIME_MICROS; public final class ColumnReaders @@ -65,9 +64,6 @@ public static ColumnReader createColumnReader( case BOOLEAN: return new BooleanColumnReader(type, column, memoryContext.newLocalMemoryContext(ColumnReaders.class.getSimpleName())); case BYTE: - if (type == INTEGER && !column.getAttributes().containsKey("iceberg.id")) { - throw invalidStreamType(column, type); - } return new ByteColumnReader(type, column, memoryContext.newLocalMemoryContext(ColumnReaders.class.getSimpleName())); case SHORT: case INT: diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java index c2dde16d46e4..407ad60cbc3f 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveTransactionalTable.java @@ -1132,9 +1132,9 @@ public void testOrcColumnTypeChange(boolean transactional) log.info("This shows that Hive see the old data after a column is widened"); assertThat(onHive().executeQuery("SELECT * FROM " + tableName)) .containsOnly(row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA")); - log.info("This shows that Trino gets an exception trying to widen the type"); - assertQueryFailure(() -> onTrino().executeQuery("SELECT * FROM " + tableName)) - .hasMessageMatching(".*Malformed ORC file. Cannot read SQL type 'integer' from ORC stream '.*.age' of type BYTE with attributes.*"); + log.info("This shows that Trino see the old data after a column is widened"); + assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)) + .containsOnly(row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA")); }); } From 6e081f189dcc67bb7b607ffbda7b73cf9a14787d Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Fri, 3 Feb 2023 16:03:29 +0900 Subject: [PATCH 4/7] Support type evolution from tinyint to smallint and bigint in ORC --- .../io/trino/orc/reader/ByteColumnReader.java | 36 ++++++++++++++++++- .../io/trino/testing/BaseConnectorTest.java | 3 ++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java b/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java index 1dc87789cc4d..f402250266f8 100644 --- a/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java +++ b/lib/trino-orc/src/main/java/io/trino/orc/reader/ByteColumnReader.java @@ -25,7 +25,9 @@ import io.trino.spi.block.Block; import io.trino.spi.block.ByteArrayBlock; import io.trino.spi.block.IntArrayBlock; +import io.trino.spi.block.LongArrayBlock; import io.trino.spi.block.RunLengthEncodedBlock; +import io.trino.spi.block.ShortArrayBlock; import io.trino.spi.type.Type; import javax.annotation.Nullable; @@ -43,7 +45,9 @@ import static io.trino.orc.reader.ReaderUtils.minNonNullValueSize; import static io.trino.orc.reader.ReaderUtils.verifyStreamType; import static io.trino.orc.stream.MissingInputStreamSource.missingStreamSource; +import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.SmallintType.SMALLINT; import static io.trino.spi.type.TinyintType.TINYINT; import static java.util.Objects.requireNonNull; @@ -77,7 +81,7 @@ public ByteColumnReader(Type type, OrcColumn column, LocalMemoryContext memoryCo throws OrcCorruptionException { this.type = requireNonNull(type, "type is null"); - verifyStreamType(column, type, t -> t == TINYINT || t == INTEGER); + verifyStreamType(column, type, t -> t == TINYINT || t == SMALLINT || t == INTEGER || t == BIGINT); this.column = requireNonNull(column, "column is null"); this.memoryContext = requireNonNull(memoryContext, "memoryContext is null"); @@ -152,9 +156,15 @@ private Block readNonNullBlock() if (type == TINYINT) { return new ByteArrayBlock(nextBatchSize, Optional.empty(), values); } + if (type == SMALLINT) { + return new ShortArrayBlock(nextBatchSize, Optional.empty(), convertToShortArray(values)); + } if (type == INTEGER) { return new IntArrayBlock(nextBatchSize, Optional.empty(), convertToIntArray(values)); } + if (type == BIGINT) { + return new LongArrayBlock(nextBatchSize, Optional.empty(), convertToLongArray(values)); + } throw new VerifyError("Unsupported type " + type); } @@ -174,9 +184,15 @@ private Block readNullBlock(boolean[] isNull, int nonNullCount) if (type == TINYINT) { return new ByteArrayBlock(nextBatchSize, Optional.of(isNull), result); } + if (type == SMALLINT) { + return new ShortArrayBlock(nextBatchSize, Optional.of(isNull), convertToShortArray(result)); + } if (type == INTEGER) { return new IntArrayBlock(nextBatchSize, Optional.of(isNull), convertToIntArray(result)); } + if (type == BIGINT) { + return new LongArrayBlock(nextBatchSize, Optional.of(isNull), convertToLongArray(result)); + } throw new VerifyError("Unsupported type " + type); } @@ -219,6 +235,15 @@ public void startRowGroup(InputStreamSources dataStreamSources) rowGroupOpen = false; } + private static short[] convertToShortArray(byte[] bytes) + { + short[] values = new short[bytes.length]; + for (int i = 0; i < bytes.length; i++) { + values[i] = bytes[i]; + } + return values; + } + private static int[] convertToIntArray(byte[] bytes) { int[] values = new int[bytes.length]; @@ -228,6 +253,15 @@ private static int[] convertToIntArray(byte[] bytes) return values; } + private static long[] convertToLongArray(byte[] bytes) + { + long[] values = new long[bytes.length]; + for (int i = 0; i < bytes.length; i++) { + values[i] = bytes[i]; + } + return values; + } + @Override public String toString() { diff --git a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java index e76ca3763a5a..9b04113c22e6 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java @@ -2406,7 +2406,10 @@ private List setColumnTypeSetupData() { return ImmutableList.builder() .add(new SetColumnTypeSetup("tinyint", "TINYINT '127'", "smallint")) + .add(new SetColumnTypeSetup("tinyint", "TINYINT '127'", "integer")) + .add(new SetColumnTypeSetup("tinyint", "TINYINT '127'", "bigint")) .add(new SetColumnTypeSetup("smallint", "SMALLINT '32767'", "integer")) + .add(new SetColumnTypeSetup("smallint", "SMALLINT '32767'", "bigint")) .add(new SetColumnTypeSetup("integer", "2147483647", "bigint")) .add(new SetColumnTypeSetup("bigint", "BIGINT '-2147483648'", "integer")) .add(new SetColumnTypeSetup("real", "REAL '10.3'", "double")) From 45629b49449e39c612f8beebfe9f8321e8f21227 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Thu, 2 Mar 2023 10:36:50 +0900 Subject: [PATCH 5/7] Support changing column types in Hive connector --- docs/src/main/sphinx/connector/hive.rst | 10 ++ plugin/trino-hive/pom.xml | 2 + .../io/trino/plugin/hive/HiveMetadata.java | 82 ++++++++++++++++ .../plugin/hive/HiveMetastoreClosure.java | 5 + .../metastore/ForwardingHiveMetastore.java | 6 ++ .../plugin/hive/metastore/HiveMetastore.java | 2 + .../SemiTransactionalHiveMetastore.java | 5 + .../alluxio/AlluxioHiveMetastore.java | 6 ++ .../metastore/cache/CachingHiveMetastore.java | 11 +++ .../metastore/file/FileHiveMetastore.java | 28 ++++++ .../metastore/glue/GlueHiveMetastore.java | 24 +++++ .../recording/RecordingHiveMetastore.java | 7 ++ .../thrift/BridgingHiveMetastore.java | 16 ++++ .../plugin/hive/BaseHiveConnectorTest.java | 40 ++++++++ .../CountingAccessHiveMetastore.java | 6 ++ .../metastore/UnimplementedHiveMetastore.java | 6 ++ .../TestHiveGlueMetastoreCompatibility.java | 94 +++++++++++++++++++ 17 files changed, 350 insertions(+) create mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastoreCompatibility.java diff --git a/docs/src/main/sphinx/connector/hive.rst b/docs/src/main/sphinx/connector/hive.rst index 5da5a0f6ef7d..8fd3228b623f 100644 --- a/docs/src/main/sphinx/connector/hive.rst +++ b/docs/src/main/sphinx/connector/hive.rst @@ -920,6 +920,16 @@ The following operations are not supported when ``avro_schema_url`` is set: * Bucketing(``bucketed_by``) columns are not supported in ``CREATE TABLE``. * ``ALTER TABLE`` commands modifying columns are not supported. +ALTER TABLE +""""""""""" + +.. _hive-set-column-type: + +The connector supports changing column types with +``ALTER TABLE [ IF EXISTS ] name ALTER COLUMN column_name SET DATA TYPE new_type``. + +The column change command will only modify Hive's metadata, and will not modify data. + .. _hive-alter-table-execute: ALTER TABLE EXECUTE diff --git a/plugin/trino-hive/pom.xml b/plugin/trino-hive/pom.xml index b262db770255..0241a8b6e40d 100644 --- a/plugin/trino-hive/pom.xml +++ b/plugin/trino-hive/pom.xml @@ -539,6 +539,7 @@ **/TestHiveGlueMetastore.java + **/TestHiveGlueMetastoreCompatibility.java **/TestTrinoS3FileSystemAwsS3.java **/TestFullParquetReader.java **/Test*FailureRecoveryTest.java @@ -593,6 +594,7 @@ **/TestHiveGlueMetastore.java + **/TestHiveGlueMetastoreCompatibility.java **/TestTrinoS3FileSystemAwsS3.java diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index 111f0a1c0311..bba4c88eaebf 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -24,6 +24,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import com.google.common.collect.Streams; import io.airlift.json.JsonCodec; import io.airlift.log.Logger; import io.airlift.slice.Slice; @@ -119,11 +120,14 @@ import io.trino.spi.statistics.TableStatistics; import io.trino.spi.statistics.TableStatisticsMetadata; import io.trino.spi.type.ArrayType; +import io.trino.spi.type.CharType; +import io.trino.spi.type.DecimalType; import io.trino.spi.type.MapType; import io.trino.spi.type.RowType; import io.trino.spi.type.TimestampType; import io.trino.spi.type.Type; import io.trino.spi.type.TypeManager; +import io.trino.spi.type.VarcharType; import org.apache.avro.Schema; import org.apache.avro.SchemaParseException; import org.apache.hadoop.fs.FileSystem; @@ -326,6 +330,11 @@ import static io.trino.spi.predicate.TupleDomain.withColumnDomains; import static io.trino.spi.statistics.TableStatisticType.ROW_COUNT; import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.RealType.REAL; +import static io.trino.spi.type.SmallintType.SMALLINT; +import static io.trino.spi.type.TinyintType.TINYINT; import static io.trino.spi.type.TypeUtils.isFloatingPointNaN; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static java.lang.Boolean.parseBoolean; @@ -1345,6 +1354,79 @@ public void dropColumn(ConnectorSession session, ConnectorTableHandle tableHandl metastore.dropColumn(hiveTableHandle.getSchemaName(), hiveTableHandle.getTableName(), columnHandle.getName()); } + @Override + public void setColumnType(ConnectorSession session, ConnectorTableHandle tableHandle, ColumnHandle columnHandle, Type type) + { + HiveTableHandle table = (HiveTableHandle) tableHandle; + failIfAvroSchemaIsSet(table); + HiveColumnHandle column = (HiveColumnHandle) columnHandle; + + table.getPartitionNames().ifPresent(partitionNames -> { + if (partitionNames.contains(column.getName())) { + throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); + } + }); + if (!canChangeColumnType(column.getType(), type)) { + throw new TrinoException(NOT_SUPPORTED, "Cannot change type from %s to %s".formatted(column.getType(), type)); + } + metastore.setColumnType(table.getSchemaName(), table.getTableName(), column.getName(), toHiveType(type)); + } + + private static boolean canChangeColumnType(Type sourceType, Type targetType) + { + if (sourceType.equals(targetType)) { + return true; + } + if (sourceType == TINYINT) { + return targetType == SMALLINT || targetType == INTEGER || targetType == BIGINT; + } + if (sourceType == SMALLINT) { + return targetType == INTEGER || targetType == BIGINT; + } + if (sourceType == INTEGER) { + return targetType == BIGINT; + } + if (sourceType == REAL) { + return targetType == DOUBLE; + } + if (sourceType instanceof VarcharType || sourceType instanceof CharType) { + return targetType instanceof VarcharType || targetType instanceof CharType; + } + if (sourceType instanceof DecimalType sourceDecimal && targetType instanceof DecimalType targetDecimal) { + // TODO: Support rescale in ORC DecimalColumnReader + return sourceDecimal.getScale() == targetDecimal.getScale() + && sourceDecimal.getPrecision() <= targetDecimal.getPrecision(); + } + if (sourceType instanceof ArrayType sourceArrayType && targetType instanceof ArrayType targetArrayType) { + return canChangeColumnType(sourceArrayType.getElementType(), targetArrayType.getElementType()); + } + if (sourceType instanceof RowType sourceRowType && targetType instanceof RowType targetRowType) { + List fields = Streams.concat(sourceRowType.getFields().stream(), targetRowType.getFields().stream()) + .distinct() + .collect(toImmutableList()); + for (RowType.Field field : fields) { + String fieldName = field.getName().orElseThrow(); + boolean allowedChange = findFieldByName(sourceRowType.getFields(), fieldName) + .flatMap(sourceField -> findFieldByName(targetRowType.getFields(), fieldName) + .map(targetField -> canChangeColumnType(sourceField.getType(), targetField.getType()))) + // Allow removal and addition of fields + .orElse(true); + if (!allowedChange) { + return false; + } + } + return true; + } + return false; + } + + private static Optional findFieldByName(List fields, String fieldName) + { + return fields.stream() + .filter(field -> field.getName().orElseThrow().equals(fieldName)) + .findAny(); + } + @Override public void setTableAuthorization(ConnectorSession session, SchemaTableName table, TrinoPrincipal principal) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java index bf5fffd89339..0caa0f9f5993 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetastoreClosure.java @@ -205,6 +205,11 @@ public void dropColumn(String databaseName, String tableName, String columnName) delegate.dropColumn(databaseName, tableName, columnName); } + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + delegate.setColumnType(databaseName, tableName, columnName, columnType); + } + public Optional getPartition(String databaseName, String tableName, List partitionValues) { return delegate.getTable(databaseName, tableName) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java index 6feb6343b3b2..f5896d5adac6 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/ForwardingHiveMetastore.java @@ -223,6 +223,12 @@ public void dropColumn(String databaseName, String tableName, String columnName) delegate.dropColumn(databaseName, tableName, columnName); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + delegate.setColumnType(databaseName, tableName, columnName, columnType); + } + @Override public Optional getPartition(Table table, List partitionValues) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java index 8e526e4ddc36..2a9edd3c1ef2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/HiveMetastore.java @@ -99,6 +99,8 @@ default void updatePartitionStatistics(Table table, String partitionName, Functi void dropColumn(String databaseName, String tableName, String columnName); + void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType); + Optional getPartition(Table table, List partitionValues); /** diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java index d5c13d276f29..0648e7655bfd 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/SemiTransactionalHiveMetastore.java @@ -656,6 +656,11 @@ public synchronized void dropColumn(String databaseName, String tableName, Strin setExclusive((delegate, hdfsEnvironment) -> delegate.dropColumn(databaseName, tableName, columnName)); } + public synchronized void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + setExclusive((delegate, hdfsEnvironment) -> delegate.setColumnType(databaseName, tableName, columnName, columnType)); + } + public synchronized void finishChangingExistingTable( AcidOperation acidOperation, ConnectorSession session, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java index 4b2656678c67..142ed5f3a3f8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/alluxio/AlluxioHiveMetastore.java @@ -336,6 +336,12 @@ public void dropColumn(String databaseName, String tableName, String columnName) throw new TrinoException(NOT_SUPPORTED, "dropColumn"); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + throw new TrinoException(NOT_SUPPORTED, "setColumnType"); + } + @Override public Optional getPartition(Table table, List partitionValues) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java index 161bf865666a..07543a83782d 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java @@ -765,6 +765,17 @@ public void dropColumn(String databaseName, String tableName, String columnName) } } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + try { + delegate.setColumnType(databaseName, tableName, columnName, columnType); + } + finally { + invalidateTable(databaseName, tableName); + } + } + public void invalidateTable(String databaseName, String tableName) { invalidateTableCache(databaseName, tableName); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java index 7ded8a724ef0..382085d9481f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java @@ -761,6 +761,34 @@ public synchronized void dropColumn(String databaseName, String tableName, Strin }); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + alterTable(databaseName, tableName, oldTable -> { + if (oldTable.getColumn(columnName).isEmpty()) { + SchemaTableName name = new SchemaTableName(databaseName, tableName); + throw new ColumnNotFoundException(name, columnName); + } + for (Column column : oldTable.getPartitionColumns()) { + if (column.getName().equals(columnName)) { + throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); + } + } + + ImmutableList.Builder newDataColumns = ImmutableList.builderWithExpectedSize(oldTable.getDataColumns().size()); + for (Column fieldSchema : oldTable.getDataColumns()) { + if (fieldSchema.getName().equals(columnName)) { + newDataColumns.add(new Column(fieldSchema.getName(), columnType, fieldSchema.getComment())); + } + else { + newDataColumns.add(fieldSchema); + } + } + + return oldTable.withDataColumns(currentVersion, newDataColumns.build()); + }); + } + private void alterTable(String databaseName, String tableName, Function alterFunction) { requireNonNull(databaseName, "databaseName is null"); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index 4164368fbca8..4b015d666496 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -777,6 +777,30 @@ public void dropColumn(String databaseName, String tableName, String columnName) replaceTable(databaseName, tableName, newTable, null); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + Table oldTable = getExistingTable(databaseName, tableName); + if (oldTable.getPartitionColumns().stream().anyMatch(column -> column.getName().equals(columnName))) { + throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); + } + + ImmutableList.Builder newDataColumns = ImmutableList.builderWithExpectedSize(oldTable.getDataColumns().size()); + for (Column column : oldTable.getDataColumns()) { + if (column.getName().equals(columnName)) { + newDataColumns.add(new Column(column.getName(), columnType, column.getComment())); + } + else { + newDataColumns.add(column); + } + } + + Table newTable = Table.builder(oldTable) + .setDataColumns(newDataColumns.build()) + .build(); + replaceTable(databaseName, tableName, newTable, null); + } + @Override public Optional getPartition(Table table, List partitionValues) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/recording/RecordingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/recording/RecordingHiveMetastore.java index 7c8e7f53cbe2..5683e4827dfc 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/recording/RecordingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/recording/RecordingHiveMetastore.java @@ -241,6 +241,13 @@ public void dropColumn(String databaseName, String tableName, String columnName) delegate.dropColumn(databaseName, tableName, columnName); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + verifyRecordingMode(); + delegate.setColumnType(databaseName, tableName, columnName, columnType); + } + @Override public Optional getPartition(Table table, List partitionValues) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java index 4fabccfcf585..ad44b4924254 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java @@ -314,6 +314,22 @@ public void dropColumn(String databaseName, String tableName, String columnName) alterTable(databaseName, tableName, table); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + io.trino.hive.thrift.metastore.Table table = delegate.getTable(databaseName, tableName) + .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); + if (table.getPartitionKeys().stream().anyMatch(column -> column.getName().equals(columnName))) { + throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); + } + for (FieldSchema fieldSchema : table.getSd().getCols()) { + if (fieldSchema.getName().equals(columnName)) { + fieldSchema.setType(columnType.getHiveTypeName().toString()); + } + } + alterTable(databaseName, tableName, table); + } + private void alterTable(String databaseName, String tableName, io.trino.hive.thrift.metastore.Table table) { delegate.alterTable(databaseName, tableName, table); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index e3018bf209d6..8ade4abfda15 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -4960,6 +4960,46 @@ Actual rows (up to 100 of 1 extra rows shown, 1 rows in total): [1, 2]"""); } + @Test + public void testUnsupportedSetPartitionColumnType() + { + try (TestTable table = new TestTable( + getQueryRunner()::execute, + "test_unsupported_set_partition_column_type_", + "WITH(partitioned_by = ARRAY['part']) AS SELECT 1 AS id, 2 AS part")) { + assertQueryFails( + "ALTER TABLE " + table.getName() + " ALTER COLUMN part SET DATA TYPE bigint", + "Changing partition column types is not supported"); + assertQuery("SELECT * FROM " + table.getName(), "VALUES (1, 2)"); + } + } + + @Override + protected void verifySetColumnTypeFailurePermissible(Throwable e) + { + assertThat(e).hasMessageContaining("Cannot change type"); + } + + @Override + protected Optional filterSetColumnTypesDataProvider(SetColumnTypeSetup setup) + { + switch ("%s -> %s".formatted(setup.sourceColumnType(), setup.newColumnType())) { + case "timestamp(3) -> timestamp(6)": + // TODO: Support changing timestamp type + case "bigint -> integer": + case "decimal(5,3) -> decimal(5,2)": + // Changing to small types is disallowed in the connector because it may lead to query failures + return Optional.of(setup.asUnsupported()); + case "char(20) -> varchar": + // Char values are stored with trailing spaces trimmed, so after the conversion the varchar values don't have them. + return Optional.of(setup.withNewValueLiteral("rtrim(%s)".formatted(setup.newValueLiteral()))); + case "timestamp(6) -> timestamp(3)": + // Creating timestamp(6) column throws 'Incorrect timestamp precision for timestamp(6); the configured precision is MILLISECONDS' + return Optional.empty(); + } + return Optional.of(setup); + } + @Test public void testAvroTypeValidation() { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastore.java index 11d36053b434..15e072464d34 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/CountingAccessHiveMetastore.java @@ -196,6 +196,12 @@ public void dropColumn(String databaseName, String tableName, String columnName) throw new UnsupportedOperationException(); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + throw new UnsupportedOperationException(); + } + @Override public Optional getPartition(Table table, List partitionValues) { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java index af8d8288a357..db0dc7e42b57 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/UnimplementedHiveMetastore.java @@ -184,6 +184,12 @@ public void dropColumn(String databaseName, String tableName, String columnName) throw new UnsupportedOperationException(); } + @Override + public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) + { + throw new UnsupportedOperationException(); + } + @Override public Optional getPartition(Table table, List partitionValues) { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastoreCompatibility.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastoreCompatibility.java new file mode 100644 index 000000000000..93e9adf5b4a5 --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/TestHiveGlueMetastoreCompatibility.java @@ -0,0 +1,94 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.metastore.glue; + +import com.google.common.collect.ImmutableMap; +import io.trino.Session; +import io.trino.plugin.hive.TestingHivePlugin; +import io.trino.testing.AbstractTestQueryFramework; +import io.trino.testing.DistributedQueryRunner; +import io.trino.testing.QueryRunner; +import io.trino.testing.sql.TestTable; +import org.testng.annotations.AfterClass; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Map; + +import static com.google.common.io.MoreFiles.deleteRecursively; +import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE; +import static io.trino.plugin.hive.metastore.glue.GlueHiveMetastore.createTestingGlueHiveMetastore; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static org.assertj.core.api.Assertions.assertThat; + +/* + * TestHiveGlueMetastoreCompatibility currently uses AWS Default Credential Provider Chain, + * See https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html#credentials-default + * on ways to set your AWS credentials which will be needed to run this test. + */ +public class TestHiveGlueMetastoreCompatibility + extends AbstractTestQueryFramework +{ + protected static final String SCHEMA = "test_hive_glue_" + randomNameSuffix(); + + private String dataDirectory; + private GlueHiveMetastore glueMetastore; + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + Session session = testSessionBuilder() + .setCatalog("hive") + .setSchema(SCHEMA) + .build(); + DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).build(); + + dataDirectory = queryRunner.getCoordinator().getBaseDataDir().resolve("hive_data").toString(); + glueMetastore = createTestingGlueHiveMetastore(dataDirectory); + + queryRunner.installPlugin(new TestingHivePlugin(glueMetastore)); + + Map connectorProperties = ImmutableMap.builder() + .put("hive.security", "allow-all") + .buildOrThrow(); + + queryRunner.createCatalog("hive", "hive", connectorProperties); + queryRunner.execute("CREATE SCHEMA " + SCHEMA); + + return queryRunner; + } + + @AfterClass(alwaysRun = true) + public void tearDown() + throws IOException + { + if (glueMetastore != null) { + glueMetastore.dropDatabase(SCHEMA, false); + deleteRecursively(Path.of(dataDirectory), ALLOW_INSECURE); + } + } + + @Test + public void testSetColumnType() + { + try (TestTable table = new TestTable(getQueryRunner()::execute, "test_set_column_type_", "AS SELECT CAST(123 AS integer) AS col")) { + assertUpdate("ALTER TABLE " + table.getName() + " ALTER COLUMN col SET DATA TYPE bigint"); + assertThat(query("SELECT * FROM " + table.getName())) + .matches("VALUES BIGINT '123'"); + } + } +} From 79d32e32198e5db2caacd19c1b60339c2ba89d5e Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Thu, 2 Mar 2023 15:05:57 +0900 Subject: [PATCH 6/7] fixup! Support changing column types in Hive connector --- .../io/trino/plugin/hive/HiveMetadata.java | 6 +++ .../plugin/hive/BaseHiveConnectorTest.java | 49 ++++++++++++++++--- .../io/trino/testing/BaseConnectorTest.java | 18 ++++--- 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index bba4c88eaebf..a85b56d7c5be 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -1361,6 +1361,12 @@ public void setColumnType(ConnectorSession session, ConnectorTableHandle tableHa failIfAvroSchemaIsSet(table); HiveColumnHandle column = (HiveColumnHandle) columnHandle; + HiveStorageFormat storageFormat = extractHiveStorageFormat(metastore.getTable(table.getSchemaName(), table.getTableName()) + .orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName()))); + if (storageFormat != HiveStorageFormat.ORC && storageFormat != HiveStorageFormat.PARQUET) { + throw new TrinoException(NOT_SUPPORTED, "Unsupported storage format for changing column type: " + storageFormat); + } + table.getPartitionNames().ifPresent(partitionNames -> { if (partitionNames.contains(column.getName())) { throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index 8ade4abfda15..9d2d33d9a5f9 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -236,7 +236,6 @@ protected boolean hasBehavior(TestingConnectorBehavior connectorBehavior) return true; case SUPPORTS_DROP_FIELD: - case SUPPORTS_SET_COLUMN_TYPE: return false; case SUPPORTS_CREATE_VIEW: @@ -4977,13 +4976,49 @@ public void testUnsupportedSetPartitionColumnType() @Override protected void verifySetColumnTypeFailurePermissible(Throwable e) { - assertThat(e).hasMessageContaining("Cannot change type"); + assertThat(e) + .hasMessageMatching(".*(Cannot change type|Unsupported storage format for changing column type).*"); + } + + @Override + protected List setColumnTypeSetupData() + { + ImmutableList.Builder setup = ImmutableList.builder(); + for (HiveStorageFormat storageFormat : HiveStorageFormat.values()) { + if (storageFormat == REGEX) { + // REGEX format is read-only + continue; + } + setup.addAll(super.setColumnTypeSetupData().stream() + .map(data -> data.withTableProperty("format = '%s'".formatted(storageFormat))) + .collect(toImmutableList())); + } + return setup.build(); } @Override protected Optional filterSetColumnTypesDataProvider(SetColumnTypeSetup setup) { - switch ("%s -> %s".formatted(setup.sourceColumnType(), setup.newColumnType())) { + String tableProperty = setup.tableProperty().orElseThrow(); + String columnMapping = "%s -> %s".formatted(setup.sourceColumnType(), setup.newColumnType()); + + if (columnMapping.equals("timestamp(6) -> timestamp(3)")) { + // Creating timestamp(6) column throws 'Incorrect timestamp precision for timestamp(6); the configured precision is MILLISECONDS' + return Optional.empty(); + } + + switch (tableProperty) { + case "WITH (format = 'AVRO')": + case "WITH (format = 'RCBINARY')": + case "WITH (format = 'RCTEXT')": + case "WITH (format = 'SEQUENCEFILE')": + case "WITH (format = 'JSON')": + case "WITH (format = 'TEXTFILE')": + case "WITH (format = 'CSV')": + return Optional.of(setup.asUnsupported()); + } + + switch (columnMapping) { case "timestamp(3) -> timestamp(6)": // TODO: Support changing timestamp type case "bigint -> integer": @@ -4993,9 +5028,11 @@ protected Optional filterSetColumnTypesDataProvider(SetColum case "char(20) -> varchar": // Char values are stored with trailing spaces trimmed, so after the conversion the varchar values don't have them. return Optional.of(setup.withNewValueLiteral("rtrim(%s)".formatted(setup.newValueLiteral()))); - case "timestamp(6) -> timestamp(3)": - // Creating timestamp(6) column throws 'Incorrect timestamp precision for timestamp(6); the configured precision is MILLISECONDS' - return Optional.empty(); + case "row(x integer) -> row(y integer)": + if (tableProperty.equals("WITH (format = 'PARQUET')")) { + // TODO https://github.com/trinodb/trino/issues/15822 The connector returns incorrect NULL when a field in row type doesn't exist in Parquet files + return Optional.of(setup.withNewValueLiteral("NULL")); + } } return Optional.of(setup); } diff --git a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java index 9b04113c22e6..01af7a887882 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java @@ -2366,7 +2366,7 @@ public void testSetColumnTypes(SetColumnTypeSetup setup) TestTable table; try { - table = new TestTable(getQueryRunner()::execute, "test_set_column_type_", " AS SELECT CAST(" + setup.sourceValueLiteral + " AS " + setup.sourceColumnType + ") AS col"); + table = new TestTable(getQueryRunner()::execute, "test_set_column_type_", setup.tableProperty.orElse("") + " AS SELECT CAST(" + setup.sourceValueLiteral + " AS " + setup.sourceColumnType + ") AS col"); } catch (Exception e) { verifyUnsupportedTypeException(e, setup.sourceColumnType); @@ -2402,7 +2402,7 @@ protected Optional filterSetColumnTypesDataProvider(SetColum return Optional.of(setup); } - private List setColumnTypeSetupData() + protected List setColumnTypeSetupData() { return ImmutableList.builder() .add(new SetColumnTypeSetup("tinyint", "TINYINT '127'", "smallint")) @@ -2443,7 +2443,7 @@ private List setColumnTypeSetupData() .build(); } - public record SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType, String newValueLiteral, boolean unsupportedType) + public record SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType, String newValueLiteral, boolean unsupportedType, Optional tableProperty) { public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType) { @@ -2452,7 +2452,7 @@ public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, St public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType, String newValueLiteral) { - this(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, false); + this(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, false, Optional.empty()); } public SetColumnTypeSetup @@ -2461,17 +2461,23 @@ public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, St requireNonNull(sourceValueLiteral, "sourceValueLiteral is null"); requireNonNull(newColumnType, "newColumnType is null"); requireNonNull(newValueLiteral, "newValueLiteral is null"); + requireNonNull(tableProperty, "tableProperty is null"); } public SetColumnTypeSetup withNewValueLiteral(String newValueLiteral) { checkState(!unsupportedType); - return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, unsupportedType); + return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, unsupportedType, tableProperty); + } + + public SetColumnTypeSetup withTableProperty(String tableProperty) + { + return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, unsupportedType, Optional.of("WITH (%s)".formatted(tableProperty))); } public SetColumnTypeSetup asUnsupported() { - return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, true); + return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, true, tableProperty); } } From ec89b8e65f8e8bc2030542993117faccafbaf8c7 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Mon, 6 Mar 2023 14:45:57 +0900 Subject: [PATCH 7/7] fixup! Support changing column types in Hive connector --- .../io/trino/plugin/hive/HiveMetadata.java | 78 ++++++++++++++--- .../metastore/file/FileHiveMetastore.java | 5 -- .../metastore/glue/GlueHiveMetastore.java | 4 - .../thrift/BridgingHiveMetastore.java | 3 - .../plugin/hive/BaseHiveConnectorTest.java | 32 ++++--- .../product/hive/TestHiveSetColumnType.java | 87 +++++++++++++++++++ .../io/trino/testing/BaseConnectorTest.java | 20 +++-- 7 files changed, 184 insertions(+), 45 deletions(-) create mode 100644 testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSetColumnType.java diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index a85b56d7c5be..f3f80fc20fc8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.collect.Streams; @@ -171,6 +172,7 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Iterables.concat; import static com.google.common.collect.Iterables.getOnlyElement; +import static com.google.common.collect.MoreCollectors.toOptional; import static io.trino.hdfs.ConfigurationUtils.toJobConf; import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; import static io.trino.plugin.hive.HiveAnalyzeProperties.getColumnNames; @@ -1359,23 +1361,70 @@ public void setColumnType(ConnectorSession session, ConnectorTableHandle tableHa { HiveTableHandle table = (HiveTableHandle) tableHandle; failIfAvroSchemaIsSet(table); - HiveColumnHandle column = (HiveColumnHandle) columnHandle; + HiveColumnHandle hiveColumn = (HiveColumnHandle) columnHandle; HiveStorageFormat storageFormat = extractHiveStorageFormat(metastore.getTable(table.getSchemaName(), table.getTableName()) .orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName()))); + verifyStorageFormatForColumnTypeChange(storageFormat); + verifySupportedColumnTypeChange(hiveColumn.getType(), type); + if (table.getPartitionColumns().stream().anyMatch(partition -> partition.getName().equals(hiveColumn.getName()))) { + throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); + } + ImmutableList.Builder partitionsBuilder = ImmutableList.builder(); + List partitionsNames = metastore.getPartitionNames(table.getSchemaName(), table.getTableName()) + .orElseThrow(() -> new TableNotFoundException(table.getSchemaTableName())); + for (List partitionsNamesBatch : Lists.partition(partitionsNames, 1000)) { + metastore.getPartitionsByNames(table.getSchemaName(), table.getTableName(), partitionsNamesBatch).values().stream() + .filter(Optional::isPresent).map(Optional::get) + .forEach(partition -> { + boolean skipUpdatePartition = false; + verifyStorageFormatForColumnTypeChange(extractHiveStorageFormat(partition.getStorage().getStorageFormat())); + ImmutableList.Builder columns = ImmutableList.builder(); + for (Column column : partition.getColumns()) { + if (column.getName().equals(hiveColumn.getName())) { + Type sourceType = column.getType().getType(typeManager, getTimestampPrecision(session)); + verifySupportedColumnTypeChange(sourceType, type); + columns.add(column); + skipUpdatePartition = sourceType.equals(type); + } + else { + columns.add(column); + } + } + if (!skipUpdatePartition) { + partitionsBuilder.add(Partition.builder(partition).setColumns(columns.build()).build()); + } + }); + } + List partitions = partitionsBuilder.build(); + metastore.setColumnType(table.getSchemaName(), table.getTableName(), hiveColumn.getName(), toHiveType(type)); + if (!partitions.isEmpty()) { + // Hive changes a column definition in each partitions unless the ALTER TABLE statement doesn't contain partition condition + // Trino doesn't support specifying partitions in ALTER TABLE, so SET DATA TYPE updates all partitions + // https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-AlterPartition + metastore.alterPartitions(table.getSchemaName(), table.getTableName(), partitions, OptionalLong.empty()); + } + } + + private void verifyStorageFormatForColumnTypeChange(HiveStorageFormat storageFormat) + { + // TODO: Support other storage format except for CSV and REGEX + // AVRO leads to query failure after converting varchar to char(20) + // RCBINARY leads to query failure and incorrect results + // RCTEXT returns incorrect results on row types + // SEQUENCEFILE returns incorrect results on row types + // JSON leads to query failure for NaN after changing real to double type + // TEXTFILE returns incorrect results on row types if (storageFormat != HiveStorageFormat.ORC && storageFormat != HiveStorageFormat.PARQUET) { throw new TrinoException(NOT_SUPPORTED, "Unsupported storage format for changing column type: " + storageFormat); } + } - table.getPartitionNames().ifPresent(partitionNames -> { - if (partitionNames.contains(column.getName())) { - throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); - } - }); - if (!canChangeColumnType(column.getType(), type)) { - throw new TrinoException(NOT_SUPPORTED, "Cannot change type from %s to %s".formatted(column.getType(), type)); + private void verifySupportedColumnTypeChange(Type sourceType, Type targetType) + { + if (!canChangeColumnType(sourceType, targetType)) { + throw new TrinoException(NOT_SUPPORTED, "Cannot change type from %s to %s".formatted(sourceType, targetType)); } - metastore.setColumnType(table.getSchemaName(), table.getTableName(), column.getName(), toHiveType(type)); } private static boolean canChangeColumnType(Type sourceType, Type targetType) @@ -1396,6 +1445,7 @@ private static boolean canChangeColumnType(Type sourceType, Type targetType) return targetType == DOUBLE; } if (sourceType instanceof VarcharType || sourceType instanceof CharType) { + // Truncation characters is supported return targetType instanceof VarcharType || targetType instanceof CharType; } if (sourceType instanceof DecimalType sourceDecimal && targetType instanceof DecimalType targetDecimal) { @@ -1415,7 +1465,7 @@ private static boolean canChangeColumnType(Type sourceType, Type targetType) boolean allowedChange = findFieldByName(sourceRowType.getFields(), fieldName) .flatMap(sourceField -> findFieldByName(targetRowType.getFields(), fieldName) .map(targetField -> canChangeColumnType(sourceField.getType(), targetField.getType()))) - // Allow removal and addition of fields + // Allow removal and addition of fields as the connector supports dropping and re-adding a column .orElse(true); if (!allowedChange) { return false; @@ -1430,7 +1480,7 @@ private static Optional findFieldByName(List field { return fields.stream() .filter(field -> field.getName().orElseThrow().equals(fieldName)) - .findAny(); + .collect(toOptional()); } @Override @@ -3551,7 +3601,11 @@ public List listTablePrivileges(ConnectorSession session, SchemaTable private static HiveStorageFormat extractHiveStorageFormat(Table table) { - StorageFormat storageFormat = table.getStorage().getStorageFormat(); + return extractHiveStorageFormat(table.getStorage().getStorageFormat()); + } + + private static HiveStorageFormat extractHiveStorageFormat(StorageFormat storageFormat) + { String outputFormat = storageFormat.getOutputFormat(); String serde = storageFormat.getSerde(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java index 382085d9481f..6ec099efe951 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/file/FileHiveMetastore.java @@ -769,11 +769,6 @@ public void setColumnType(String databaseName, String tableName, String columnNa SchemaTableName name = new SchemaTableName(databaseName, tableName); throw new ColumnNotFoundException(name, columnName); } - for (Column column : oldTable.getPartitionColumns()) { - if (column.getName().equals(columnName)) { - throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); - } - } ImmutableList.Builder newDataColumns = ImmutableList.builderWithExpectedSize(oldTable.getDataColumns().size()); for (Column fieldSchema : oldTable.getDataColumns()) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java index 4b015d666496..171a7c9552bc 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/GlueHiveMetastore.java @@ -781,10 +781,6 @@ public void dropColumn(String databaseName, String tableName, String columnName) public void setColumnType(String databaseName, String tableName, String columnName, HiveType columnType) { Table oldTable = getExistingTable(databaseName, tableName); - if (oldTable.getPartitionColumns().stream().anyMatch(column -> column.getName().equals(columnName))) { - throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); - } - ImmutableList.Builder newDataColumns = ImmutableList.builderWithExpectedSize(oldTable.getDataColumns().size()); for (Column column : oldTable.getDataColumns()) { if (column.getName().equals(columnName)) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java index ad44b4924254..7a4baad8a237 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/BridgingHiveMetastore.java @@ -319,9 +319,6 @@ public void setColumnType(String databaseName, String tableName, String columnNa { io.trino.hive.thrift.metastore.Table table = delegate.getTable(databaseName, tableName) .orElseThrow(() -> new TableNotFoundException(new SchemaTableName(databaseName, tableName))); - if (table.getPartitionKeys().stream().anyMatch(column -> column.getName().equals(columnName))) { - throw new TrinoException(NOT_SUPPORTED, "Changing partition column types is not supported"); - } for (FieldSchema fieldSchema : table.getSd().getCols()) { if (fieldSchema.getName().equals(columnName)) { fieldSchema.setType(columnType.getHiveTypeName().toString()); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java index 9d2d33d9a5f9..9d8cc38faf26 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/BaseHiveConnectorTest.java @@ -4986,12 +4986,18 @@ protected List setColumnTypeSetupData() ImmutableList.Builder setup = ImmutableList.builder(); for (HiveStorageFormat storageFormat : HiveStorageFormat.values()) { if (storageFormat == REGEX) { - // REGEX format is read-only + // Cannot prepare test data with REGEX table continue; } - setup.addAll(super.setColumnTypeSetupData().stream() - .map(data -> data.withTableProperty("format = '%s'".formatted(storageFormat))) - .collect(toImmutableList())); + if (storageFormat == HiveStorageFormat.ORC || storageFormat == HiveStorageFormat.PARQUET) { + setup.addAll(super.setColumnTypeSetupData().stream() + .map(data -> data.withTableProperty("format = '%s'".formatted(storageFormat))) + .collect(toImmutableList())); + } + else { + setup.add(new SetColumnTypeSetup("integer", "2147483647", "bigint") + .withTableProperty("format = '%s'".formatted(storageFormat))); + } } return setup.build(); } @@ -4999,7 +5005,7 @@ protected List setColumnTypeSetupData() @Override protected Optional filterSetColumnTypesDataProvider(SetColumnTypeSetup setup) { - String tableProperty = setup.tableProperty().orElseThrow(); + String tableProperty = setup.tableProperties(); String columnMapping = "%s -> %s".formatted(setup.sourceColumnType(), setup.newColumnType()); if (columnMapping.equals("timestamp(6) -> timestamp(3)")) { @@ -5008,13 +5014,13 @@ protected Optional filterSetColumnTypesDataProvider(SetColum } switch (tableProperty) { - case "WITH (format = 'AVRO')": - case "WITH (format = 'RCBINARY')": - case "WITH (format = 'RCTEXT')": - case "WITH (format = 'SEQUENCEFILE')": - case "WITH (format = 'JSON')": - case "WITH (format = 'TEXTFILE')": - case "WITH (format = 'CSV')": + case "format = 'AVRO'": + case "format = 'RCBINARY'": + case "format = 'RCTEXT'": + case "format = 'SEQUENCEFILE'": + case "format = 'JSON'": + case "format = 'TEXTFILE'": + case "format = 'CSV'": return Optional.of(setup.asUnsupported()); } @@ -5029,7 +5035,7 @@ protected Optional filterSetColumnTypesDataProvider(SetColum // Char values are stored with trailing spaces trimmed, so after the conversion the varchar values don't have them. return Optional.of(setup.withNewValueLiteral("rtrim(%s)".formatted(setup.newValueLiteral()))); case "row(x integer) -> row(y integer)": - if (tableProperty.equals("WITH (format = 'PARQUET')")) { + if (tableProperty.equals("format = 'PARQUET'")) { // TODO https://github.com/trinodb/trino/issues/15822 The connector returns incorrect NULL when a field in row type doesn't exist in Parquet files return Optional.of(setup.withNewValueLiteral("NULL")); } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSetColumnType.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSetColumnType.java new file mode 100644 index 000000000000..11533043ad4b --- /dev/null +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSetColumnType.java @@ -0,0 +1,87 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests.product.hive; + +import org.assertj.core.api.Assertions; +import org.testng.annotations.Test; + +import static io.trino.tempto.assertions.QueryAssert.Row.row; +import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure; +import static io.trino.tempto.assertions.QueryAssert.assertThat; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static io.trino.tests.product.utils.QueryExecutors.onHive; +import static io.trino.tests.product.utils.QueryExecutors.onTrino; + +public class TestHiveSetColumnType + extends HiveProductTest +{ + @Test + public void testSetColumnType() + { + String tableName = "test_set_column_type_" + randomNameSuffix(); + + onTrino().executeQuery("CREATE TABLE hive.default." + tableName + " AS SELECT CAST(123 AS integer) AS col"); + onTrino().executeQuery("ALTER TABLE hive.default." + tableName + " ALTER COLUMN col SET DATA TYPE bigint"); + + assertThat(onTrino().executeQuery("SELECT * FROM hive.default." + tableName)) + .containsOnly(row(123)); + assertThat(onHive().executeQuery("SELECT * FROM default." + tableName)) + .containsOnly(row(123)); + + onTrino().executeQuery("DROP TABLE hive.default." + tableName); + } + + @Test + public void testSetColumnTypeOnPartitionedTable() + { + String tableName = "test_set_column_type_partitioned_" + randomNameSuffix(); + + onTrino().executeQuery("CREATE TABLE hive.default." + tableName + " WITH (partitioned_by = ARRAY['part']) AS SELECT CAST(123 AS integer) AS col, 'test_partition' AS part"); + assertThat(onHive().executeQuery("SHOW TABLE EXTENDED LIKE " + tableName + " PARTITION (part='test_partition')")) + .contains(row("columns:struct columns { i32 col}")); + + // Verif SET DATA TYPE changes a column types in partitions + onTrino().executeQuery("ALTER TABLE hive.default." + tableName + " ALTER COLUMN col SET DATA TYPE bigint"); + assertThat(onHive().executeQuery("SHOW TABLE EXTENDED LIKE " + tableName + " PARTITION (part='test_partition')")) + .contains(row("columns:struct columns { i64 col}")); + + assertThat(onTrino().executeQuery("SELECT * FROM hive.default." + tableName)) + .containsOnly(row(123, "test_partition")); + assertThat(onHive().executeQuery("SELECT * FROM default." + tableName)) + .containsOnly(row(123, "test_partition")); + + onTrino().executeQuery("DROP TABLE hive.default." + tableName); + } + + @Test + public void testUnsupportedFileFormatInPartition() + { + String tableName = "test_unsupported_file_format_partition_" + randomNameSuffix(); + + onTrino().executeQuery("CREATE TABLE hive.default." + tableName + " WITH (format = 'PARQUET', partitioned_by = ARRAY['part']) AS SELECT CAST(123 AS integer) AS col, 'test_partition' AS part"); + onHive().executeQuery("ALTER TABLE " + tableName + " ADD PARTITION (part='test_text_partition')"); + onHive().executeQuery("ALTER TABLE " + tableName + " PARTITION (part='test_text_partition') SET FILEFORMAT RCFILE"); + + assertQueryFailure(() -> onTrino().executeQuery("ALTER TABLE hive.default." + tableName + " ALTER COLUMN col SET DATA TYPE bigint")) + .hasMessageContaining("Unsupported storage format for changing column type: RCBINARY"); + + // Verify table and partition definitions haven't been changed + Assertions.assertThat((String) onTrino().executeQuery("SHOW CREATE TABLE " + tableName).getOnlyValue()) + .contains("col integer"); + assertThat(onHive().executeQuery("SHOW TABLE EXTENDED LIKE " + tableName + " PARTITION (part='test_text_partition')")) + .contains(row("columns:struct columns { i32 col}")); + + onTrino().executeQuery("DROP TABLE hive.default." + tableName); + } +} diff --git a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java index 01af7a887882..0815ac16ba14 100644 --- a/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java +++ b/testing/trino-testing/src/main/java/io/trino/testing/BaseConnectorTest.java @@ -75,6 +75,7 @@ import java.util.stream.Stream; import static com.google.common.base.Preconditions.checkState; +import static com.google.common.base.Strings.isNullOrEmpty; import static com.google.common.base.Verify.verifyNotNull; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.Iterables.getOnlyElement; @@ -2364,9 +2365,13 @@ public void testSetColumnTypes(SetColumnTypeSetup setup) { skipTestUnless(hasBehavior(SUPPORTS_SET_COLUMN_TYPE) && hasBehavior(SUPPORTS_CREATE_TABLE_WITH_DATA)); + String tableConfiguration = ""; + if (!isNullOrEmpty(setup.tableProperties)) { + tableConfiguration += "WITH(%s)".formatted(setup.tableProperties); + } TestTable table; try { - table = new TestTable(getQueryRunner()::execute, "test_set_column_type_", setup.tableProperty.orElse("") + " AS SELECT CAST(" + setup.sourceValueLiteral + " AS " + setup.sourceColumnType + ") AS col"); + table = new TestTable(getQueryRunner()::execute, "test_set_column_type_", tableConfiguration + " AS SELECT CAST(" + setup.sourceValueLiteral + " AS " + setup.sourceColumnType + ") AS col"); } catch (Exception e) { verifyUnsupportedTypeException(e, setup.sourceColumnType); @@ -2443,7 +2448,7 @@ protected List setColumnTypeSetupData() .build(); } - public record SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType, String newValueLiteral, boolean unsupportedType, Optional tableProperty) + public record SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType, String newValueLiteral, boolean unsupportedType, String tableProperties) { public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType) { @@ -2452,7 +2457,7 @@ public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, St public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, String newColumnType, String newValueLiteral) { - this(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, false, Optional.empty()); + this(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, false, null); } public SetColumnTypeSetup @@ -2461,23 +2466,22 @@ public SetColumnTypeSetup(String sourceColumnType, String sourceValueLiteral, St requireNonNull(sourceValueLiteral, "sourceValueLiteral is null"); requireNonNull(newColumnType, "newColumnType is null"); requireNonNull(newValueLiteral, "newValueLiteral is null"); - requireNonNull(tableProperty, "tableProperty is null"); } public SetColumnTypeSetup withNewValueLiteral(String newValueLiteral) { checkState(!unsupportedType); - return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, unsupportedType, tableProperty); + return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, unsupportedType, tableProperties); } - public SetColumnTypeSetup withTableProperty(String tableProperty) + public SetColumnTypeSetup withTableProperty(String tableProperties) { - return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, unsupportedType, Optional.of("WITH (%s)".formatted(tableProperty))); + return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, unsupportedType, tableProperties); } public SetColumnTypeSetup asUnsupported() { - return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, true, tableProperty); + return new SetColumnTypeSetup(sourceColumnType, sourceValueLiteral, newColumnType, newValueLiteral, true, tableProperties); } }