diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 7b1aabf99154..56bbc583d84b 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -201,6 +201,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.SerializationUtil; @@ -2085,6 +2086,28 @@ public List getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table h return IcebergTableUtil.getPartitionNames(icebergTable, partitionSpec, false); } + @Override + public Map listOverriddenColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); + Map columns = Maps.newHashMap(); + for (Types.NestedField field : icebergTable.schema().columns()) { + String overriddenColumnName = getOverriddenColumn(field.type()); + if (overriddenColumnName != null) { + columns.put(field.name(), overriddenColumnName); + } + } + return columns; + } + + private String getOverriddenColumn(Type type) { + // Geometry, Geography & Timestamp_ns types + switch (type.typeId()) { + case VARIANT: + return type.toString(); + } + return null; + } + /** * A function to fetch the column information of the underlying column defined by the table format. * @param hmsTable A Hive table instance diff --git a/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q b/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q index a6a10532dcc2..fcba7e91aee9 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q @@ -1,5 +1,6 @@ -- Mask random uuid --! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/ +--! qt:replace:/(\s+uuid\s+)\S+/$1#Masked#/ -- Mask random snapshot id --! qt:replace:/('current-snapshot-id'=')\d+/$1#SnapshotId#/ -- Mask current-snapshot-timestamp-ms @@ -11,6 +12,12 @@ CREATE EXTERNAL TABLE variant_test_basic ( data VARIANT ) STORED BY ICEBERG tblproperties('format-version'='3'); +show create table variant_test_basic; + +describe variant_test_basic; + +describe formatted variant_test_basic; + -- Insert primitive types INSERT INTO variant_test_basic VALUES (1, parse_json('null')), diff --git a/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out b/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out index c71778f5c260..4ee8a4c2633c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out @@ -12,6 +12,92 @@ POSTHOOK: query: CREATE EXTERNAL TABLE variant_test_basic ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@variant_test_basic +PREHOOK: query: show create table variant_test_basic +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@variant_test_basic +POSTHOOK: query: show create table variant_test_basic +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@variant_test_basic +CREATE EXTERNAL TABLE `variant_test_basic`( + `id` int, + `data` variant) +ROW FORMAT SERDE + 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' +STORED BY + 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' + +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', + 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"data","required":false,"type":"variant"}]}', + 'format-version'='3', + 'iceberg.orc.files.only'='false', + 'metadata_location'='hdfs://### HDFS PATH ###', + 'parquet.compression'='zstd', + 'serialization.format'='1', + 'snapshot-count'='0', + 'table_type'='ICEBERG', +#### A masked pattern was here #### + 'uuid'='#Masked#', + 'write.delete.mode'='merge-on-read', + 'write.merge.mode'='merge-on-read', + 'write.metadata.delete-after-commit.enabled'='true', + 'write.update.mode'='merge-on-read') +PREHOOK: query: describe variant_test_basic +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@variant_test_basic +POSTHOOK: query: describe variant_test_basic +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@variant_test_basic +id int +data variant +PREHOOK: query: describe formatted variant_test_basic +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@variant_test_basic +POSTHOOK: query: describe formatted variant_test_basic +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@variant_test_basic +# col_name data_type comment +id int +data variant + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"id\":\"true\"}} + EXTERNAL TRUE + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"data\",\"required\":false,\"type\":\"variant\"}]} + format-version 3 + iceberg.orc.files.only false + metadata_location hdfs://### HDFS PATH ### + numFiles 0 + numRows 0 + parquet.compression zstd + rawDataSize 0 + serialization.format 1 + snapshot-count 0 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize #Masked# +#### A masked pattern was here #### + uuid #Masked# + write.delete.mode merge-on-read + write.merge.mode merge-on-read + write.metadata.delete-after-commit.enabled true + write.update.mode merge-on-read + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] PREHOOK: query: INSERT INTO variant_test_basic VALUES (1, parse_json('null')), (2, parse_json('true')), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index 9f9f9351ef48..bb057b9cffe2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -21,6 +21,7 @@ import java.io.DataOutputStream; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -95,7 +96,7 @@ public int execute() throws Exception { cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer, context.getConf())); } } - fixDecimalColumnTypeName(cols); + fixColumnTypeName(cols, table); setConstraintsAndStorageHandlerInfo(table); handleMaterializedView(table); @@ -270,15 +271,22 @@ private void getColumnsForNotPartitionKeyColumn(Table table, List c /** * Fix the type name of a column of type decimal w/o precision/scale specified. This makes * the describe table show "decimal(10,0)" instead of "decimal" even if the type stored - * in metastore is "decimal", which is possible with previous hive. + * in metastore is "decimal", which is possible with previous hive and if the column is overridden to some other + * logical type. * - * @param cols columns that to be fixed as such + * @param cols columns that to be fixed as such + * @param table the hive table */ - private static void fixDecimalColumnTypeName(List cols) { + private static void fixColumnTypeName(List cols, Table table) { + Map overriddenColumnTypes = + table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypes(table) : Collections.emptyMap(); for (FieldSchema col : cols) { if (serdeConstants.DECIMAL_TYPE_NAME.equals(col.getType())) { - col.setType(DecimalTypeInfo.getQualifiedName(HiveDecimal.USER_DEFAULT_PRECISION, - HiveDecimal.USER_DEFAULT_SCALE)); + col.setType( + DecimalTypeInfo.getQualifiedName(HiveDecimal.USER_DEFAULT_PRECISION, HiveDecimal.USER_DEFAULT_SCALE)); + } + if (overriddenColumnTypes.containsKey(col.getName())) { + col.setType(overriddenColumnTypes.get(col.getName())); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index a5bc66733f46..2cb5332a9cac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -24,6 +24,8 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; + +import java.util.Collections; import java.util.Comparator; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -38,7 +40,6 @@ import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; @@ -910,8 +911,11 @@ private String getExternal(Table table) { private String getColumns(Table table) { List columnDescs = new ArrayList<>(); + Map colTypes = + table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypes(table) : Collections.emptyMap(); for (FieldSchema column : table.getCols()) { - String columnType = formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType())); + String columnType = colTypes.getOrDefault(column.getName(), + formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType()))); String columnDesc = " " + unparseIdentifier(column.getName()) + " " + columnType; if (column.getComment() != null) { columnDesc += " COMMENT '" + HiveStringUtils.escapeHiveCommand(column.getComment()) + "'"; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index 39358ca3d594..f03e3e9f3052 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -900,7 +900,7 @@ default List getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table default List getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table table) throws SemanticException { return getPartitionNames(table, Maps.newHashMap()); } - + default ColumnInfo getColumnInfo(org.apache.hadoop.hive.ql.metadata.Table hmsTable, String colName) throws SemanticException { throw new UnsupportedOperationException("Storage handler does not support getting column type " + @@ -1023,4 +1023,8 @@ default void setMergeTaskDeleteProperties(TableDesc tableDesc) { default boolean supportsDefaultColumnValues(Map tblProps) { return false; } + + default Map listOverriddenColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + return Collections.emptyMap(); + } }