From 015611458e5f1a02a3dbbd34b46c57e28ed32ca3 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 18 Dec 2025 22:20:30 +0530 Subject: [PATCH 1/3] HIVE-29381: Iceberg: [V3] SHOW/DESCRIBE table should show the actual data type --- .../mr/hive/HiveIcebergStorageHandler.java | 23 +++++ .../src/test/queries/positive/variant_type.q | 7 ++ .../test/results/positive/variant_type.q.out | 86 +++++++++++++++++++ .../table/info/desc/DescTableOperation.java | 20 +++-- .../hadoop/hive/ql/exec/DDLPlanUtils.java | 8 +- .../hive/ql/metadata/HiveStorageHandler.java | 6 +- 6 files changed, 141 insertions(+), 9 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 7b1aabf99154..3fa4f8b813e5 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -201,6 +201,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Conversions; +import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.SerializationUtil; @@ -2085,6 +2086,28 @@ public List getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table h return IcebergTableUtil.getPartitionNames(icebergTable, partitionSpec, false); } + @Override + public Map listOverriddenColumnTypesColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); + Map columns = Maps.newHashMap(); + for (Types.NestedField field : icebergTable.schema().columns()) { + if (isOverriddenColumn(field.type().typeId())) { + columns.put(field.name(), field.type().toString()); + } + } + return columns; + } + + private boolean isOverriddenColumn(Type.TypeID typeID) { + // Geometry, Geography & Timestamp_ns types + switch (typeID) { + case VARIANT -> { + return true; + } + } + return false; + } + /** * A function to fetch the column information of the underlying column defined by the table format. * @param hmsTable A Hive table instance diff --git a/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q b/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q index a6a10532dcc2..fcba7e91aee9 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/variant_type.q @@ -1,5 +1,6 @@ -- Mask random uuid --! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/ +--! qt:replace:/(\s+uuid\s+)\S+/$1#Masked#/ -- Mask random snapshot id --! qt:replace:/('current-snapshot-id'=')\d+/$1#SnapshotId#/ -- Mask current-snapshot-timestamp-ms @@ -11,6 +12,12 @@ CREATE EXTERNAL TABLE variant_test_basic ( data VARIANT ) STORED BY ICEBERG tblproperties('format-version'='3'); +show create table variant_test_basic; + +describe variant_test_basic; + +describe formatted variant_test_basic; + -- Insert primitive types INSERT INTO variant_test_basic VALUES (1, parse_json('null')), diff --git a/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out b/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out index c71778f5c260..4ee8a4c2633c 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/variant_type.q.out @@ -12,6 +12,92 @@ POSTHOOK: query: CREATE EXTERNAL TABLE variant_test_basic ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@variant_test_basic +PREHOOK: query: show create table variant_test_basic +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@variant_test_basic +POSTHOOK: query: show create table variant_test_basic +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@variant_test_basic +CREATE EXTERNAL TABLE `variant_test_basic`( + `id` int, + `data` variant) +ROW FORMAT SERDE + 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' +STORED BY + 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' + +LOCATION + 'hdfs://### HDFS PATH ###' +TBLPROPERTIES ( + 'bucketing_version'='2', + 'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"data","required":false,"type":"variant"}]}', + 'format-version'='3', + 'iceberg.orc.files.only'='false', + 'metadata_location'='hdfs://### HDFS PATH ###', + 'parquet.compression'='zstd', + 'serialization.format'='1', + 'snapshot-count'='0', + 'table_type'='ICEBERG', +#### A masked pattern was here #### + 'uuid'='#Masked#', + 'write.delete.mode'='merge-on-read', + 'write.merge.mode'='merge-on-read', + 'write.metadata.delete-after-commit.enabled'='true', + 'write.update.mode'='merge-on-read') +PREHOOK: query: describe variant_test_basic +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@variant_test_basic +POSTHOOK: query: describe variant_test_basic +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@variant_test_basic +id int +data variant +PREHOOK: query: describe formatted variant_test_basic +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@variant_test_basic +POSTHOOK: query: describe formatted variant_test_basic +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@variant_test_basic +# col_name data_type comment +id int +data variant + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"id\":\"true\"}} + EXTERNAL TRUE + bucketing_version 2 + current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"data\",\"required\":false,\"type\":\"variant\"}]} + format-version 3 + iceberg.orc.files.only false + metadata_location hdfs://### HDFS PATH ### + numFiles 0 + numRows 0 + parquet.compression zstd + rawDataSize 0 + serialization.format 1 + snapshot-count 0 + storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler + table_type ICEBERG + totalSize #Masked# +#### A masked pattern was here #### + uuid #Masked# + write.delete.mode merge-on-read + write.merge.mode merge-on-read + write.metadata.delete-after-commit.enabled true + write.update.mode merge-on-read + +# Storage Information +SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe +InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat +OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat +Compressed: No +Sort Columns: [] PREHOOK: query: INSERT INTO variant_test_basic VALUES (1, parse_json('null')), (2, parse_json('true')), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index 9f9f9351ef48..b2698ad3c0b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -21,6 +21,7 @@ import java.io.DataOutputStream; import java.sql.SQLException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -95,7 +96,7 @@ public int execute() throws Exception { cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer, context.getConf())); } } - fixDecimalColumnTypeName(cols); + fixColumnTypeName(cols, table); setConstraintsAndStorageHandlerInfo(table); handleMaterializedView(table); @@ -270,15 +271,22 @@ private void getColumnsForNotPartitionKeyColumn(Table table, List c /** * Fix the type name of a column of type decimal w/o precision/scale specified. This makes * the describe table show "decimal(10,0)" instead of "decimal" even if the type stored - * in metastore is "decimal", which is possible with previous hive. + * in metastore is "decimal", which is possible with previous hive and if the column is overridden to some other + * logical type. * - * @param cols columns that to be fixed as such + * @param cols columns that to be fixed as such + * @param table the hive table */ - private static void fixDecimalColumnTypeName(List cols) { + private static void fixColumnTypeName(List cols, Table table) { + Map overriddenColumnTypes = + table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypesColumnTypes(table) : Collections.emptyMap(); for (FieldSchema col : cols) { if (serdeConstants.DECIMAL_TYPE_NAME.equals(col.getType())) { - col.setType(DecimalTypeInfo.getQualifiedName(HiveDecimal.USER_DEFAULT_PRECISION, - HiveDecimal.USER_DEFAULT_SCALE)); + col.setType( + DecimalTypeInfo.getQualifiedName(HiveDecimal.USER_DEFAULT_PRECISION, HiveDecimal.USER_DEFAULT_SCALE)); + } + if (overriddenColumnTypes.containsKey(col.getName())) { + col.setType(overriddenColumnTypes.get(col.getName())); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index a5bc66733f46..1607c702eb88 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -24,6 +24,8 @@ import com.google.common.base.Joiner; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; + +import java.util.Collections; import java.util.Comparator; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.common.StatsSetupConst; @@ -38,7 +40,6 @@ import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; @@ -910,8 +911,11 @@ private String getExternal(Table table) { private String getColumns(Table table) { List columnDescs = new ArrayList<>(); + Map colTypes = + table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypesColumnTypes(table) : Collections.emptyMap(); for (FieldSchema column : table.getCols()) { - String columnType = formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType())); + String columnType = colTypes.getOrDefault(column.getName(), + formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType()))); String columnDesc = " " + unparseIdentifier(column.getName()) + " " + columnType; if (column.getComment() != null) { columnDesc += " COMMENT '" + HiveStringUtils.escapeHiveCommand(column.getComment()) + "'"; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index 39358ca3d594..b40456573e01 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -900,7 +900,7 @@ default List getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table default List getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table table) throws SemanticException { return getPartitionNames(table, Maps.newHashMap()); } - + default ColumnInfo getColumnInfo(org.apache.hadoop.hive.ql.metadata.Table hmsTable, String colName) throws SemanticException { throw new UnsupportedOperationException("Storage handler does not support getting column type " + @@ -1023,4 +1023,8 @@ default void setMergeTaskDeleteProperties(TableDesc tableDesc) { default boolean supportsDefaultColumnValues(Map tblProps) { return false; } + + default Map listOverriddenColumnTypesColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + return Collections.emptyMap(); + } } From 8cee70344b94c52496c68d29971f42cae4101d8a Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Fri, 19 Dec 2025 01:18:42 +0530 Subject: [PATCH 2/3] Fix Typo --- .../org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 2 +- .../hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java | 2 +- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java | 2 +- .../org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 3fa4f8b813e5..a3690c16a778 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -2087,7 +2087,7 @@ public List getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table h } @Override - public Map listOverriddenColumnTypesColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + public Map listOverriddenColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); Map columns = Maps.newHashMap(); for (Types.NestedField field : icebergTable.schema().columns()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index b2698ad3c0b4..bb057b9cffe2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -279,7 +279,7 @@ private void getColumnsForNotPartitionKeyColumn(Table table, List c */ private static void fixColumnTypeName(List cols, Table table) { Map overriddenColumnTypes = - table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypesColumnTypes(table) : Collections.emptyMap(); + table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypes(table) : Collections.emptyMap(); for (FieldSchema col : cols) { if (serdeConstants.DECIMAL_TYPE_NAME.equals(col.getType())) { col.setType( diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java index 1607c702eb88..2cb5332a9cac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java @@ -912,7 +912,7 @@ private String getExternal(Table table) { private String getColumns(Table table) { List columnDescs = new ArrayList<>(); Map colTypes = - table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypesColumnTypes(table) : Collections.emptyMap(); + table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypes(table) : Collections.emptyMap(); for (FieldSchema column : table.getCols()) { String columnType = colTypes.getOrDefault(column.getName(), formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType()))); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java index b40456573e01..f03e3e9f3052 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java @@ -1024,7 +1024,7 @@ default boolean supportsDefaultColumnValues(Map tblProps) { return false; } - default Map listOverriddenColumnTypesColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { + default Map listOverriddenColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) { return Collections.emptyMap(); } } From 867f00c1a5dfe675c7b7e0a7f1ee2ddb0edbd25e Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Tue, 23 Dec 2025 17:24:32 +0530 Subject: [PATCH 3/3] Change Approach --- .../mr/hive/HiveIcebergStorageHandler.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index a3690c16a778..56bbc583d84b 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -2091,21 +2091,21 @@ public Map listOverriddenColumnTypes(org.apache.hadoop.hive.ql.m Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable()); Map columns = Maps.newHashMap(); for (Types.NestedField field : icebergTable.schema().columns()) { - if (isOverriddenColumn(field.type().typeId())) { - columns.put(field.name(), field.type().toString()); + String overriddenColumnName = getOverriddenColumn(field.type()); + if (overriddenColumnName != null) { + columns.put(field.name(), overriddenColumnName); } } return columns; } - private boolean isOverriddenColumn(Type.TypeID typeID) { + private String getOverriddenColumn(Type type) { // Geometry, Geography & Timestamp_ns types - switch (typeID) { - case VARIANT -> { - return true; - } + switch (type.typeId()) { + case VARIANT: + return type.toString(); } - return false; + return null; } /**