Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.Pair;
import org.apache.iceberg.util.SerializationUtil;
Expand Down Expand Up @@ -2085,6 +2086,28 @@ public List<String> getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table h
return IcebergTableUtil.getPartitionNames(icebergTable, partitionSpec, false);
}

@Override
public Map<String, String> listOverriddenColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Map<String, String> columns = Maps.newHashMap();
for (Types.NestedField field : icebergTable.schema().columns()) {
String overriddenColumnName = getOverriddenColumn(field.type());
if (overriddenColumnName != null) {
columns.put(field.name(), overriddenColumnName);
}
}
return columns;
}

private String getOverriddenColumn(Type type) {
// Geometry, Geography & Timestamp_ns types
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'll be adding TIMESTAMP_NANO here post #6242 , is my understanding correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep,

switch (type.typeId()) {
case VARIANT:
return type.toString();
}
return null;
}

/**
* A function to fetch the column information of the underlying column defined by the table format.
* @param hmsTable A Hive table instance
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
-- Mask random uuid
--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
--! qt:replace:/(\s+uuid\s+)\S+/$1#Masked#/
-- Mask random snapshot id
--! qt:replace:/('current-snapshot-id'=')\d+/$1#SnapshotId#/
-- Mask current-snapshot-timestamp-ms
Expand All @@ -11,6 +12,12 @@ CREATE EXTERNAL TABLE variant_test_basic (
data VARIANT
) STORED BY ICEBERG tblproperties('format-version'='3');

show create table variant_test_basic;

describe variant_test_basic;

describe formatted variant_test_basic;

-- Insert primitive types
INSERT INTO variant_test_basic VALUES
(1, parse_json('null')),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,92 @@ POSTHOOK: query: CREATE EXTERNAL TABLE variant_test_basic (
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@variant_test_basic
PREHOOK: query: show create table variant_test_basic
PREHOOK: type: SHOW_CREATETABLE
PREHOOK: Input: default@variant_test_basic
POSTHOOK: query: show create table variant_test_basic
POSTHOOK: type: SHOW_CREATETABLE
POSTHOOK: Input: default@variant_test_basic
CREATE EXTERNAL TABLE `variant_test_basic`(
`id` int,
`data` variant)
ROW FORMAT SERDE
'org.apache.iceberg.mr.hive.HiveIcebergSerDe'
STORED BY
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'

LOCATION
'hdfs://### HDFS PATH ###'
TBLPROPERTIES (
'bucketing_version'='2',
'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":false,"type":"int"},{"id":2,"name":"data","required":false,"type":"variant"}]}',
'format-version'='3',
'iceberg.orc.files.only'='false',
'metadata_location'='hdfs://### HDFS PATH ###',
'parquet.compression'='zstd',
'serialization.format'='1',
'snapshot-count'='0',
'table_type'='ICEBERG',
#### A masked pattern was here ####
'uuid'='#Masked#',
'write.delete.mode'='merge-on-read',
'write.merge.mode'='merge-on-read',
'write.metadata.delete-after-commit.enabled'='true',
'write.update.mode'='merge-on-read')
PREHOOK: query: describe variant_test_basic
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@variant_test_basic
POSTHOOK: query: describe variant_test_basic
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@variant_test_basic
id int
data variant
PREHOOK: query: describe formatted variant_test_basic
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@variant_test_basic
POSTHOOK: query: describe formatted variant_test_basic
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@variant_test_basic
# col_name data_type comment
id int
data variant

# Detailed Table Information
Database: default
#### A masked pattern was here ####
Retention: 0
#### A masked pattern was here ####
Table Type: EXTERNAL_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"data\":\"true\",\"id\":\"true\"}}
EXTERNAL TRUE
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"data\",\"required\":false,\"type\":\"variant\"}]}
format-version 3
iceberg.orc.files.only false
metadata_location hdfs://### HDFS PATH ###
numFiles 0
numRows 0
parquet.compression zstd
rawDataSize 0
serialization.format 1
snapshot-count 0
storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
table_type ICEBERG
totalSize #Masked#
#### A masked pattern was here ####
uuid #Masked#
write.delete.mode merge-on-read
write.merge.mode merge-on-read
write.metadata.delete-after-commit.enabled true
write.update.mode merge-on-read

# Storage Information
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
Compressed: No
Sort Columns: []
PREHOOK: query: INSERT INTO variant_test_basic VALUES
(1, parse_json('null')),
(2, parse_json('true')),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.DataOutputStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -95,7 +96,7 @@ public int execute() throws Exception {
cols.addAll(Hive.getFieldsFromDeserializer(desc.getColumnPath(), deserializer, context.getConf()));
}
}
fixDecimalColumnTypeName(cols);
fixColumnTypeName(cols, table);

setConstraintsAndStorageHandlerInfo(table);
handleMaterializedView(table);
Expand Down Expand Up @@ -270,15 +271,22 @@ private void getColumnsForNotPartitionKeyColumn(Table table, List<FieldSchema> c
/**
* Fix the type name of a column of type decimal w/o precision/scale specified. This makes
* the describe table show "decimal(10,0)" instead of "decimal" even if the type stored
* in metastore is "decimal", which is possible with previous hive.
* in metastore is "decimal", which is possible with previous hive and if the column is overridden to some other
* logical type.
*
* @param cols columns that to be fixed as such
* @param cols columns that to be fixed as such
* @param table the hive table
*/
private static void fixDecimalColumnTypeName(List<FieldSchema> cols) {
private static void fixColumnTypeName(List<FieldSchema> cols, Table table) {
Map<String, String> overriddenColumnTypes =
table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypes(table) : Collections.emptyMap();
for (FieldSchema col : cols) {
if (serdeConstants.DECIMAL_TYPE_NAME.equals(col.getType())) {
col.setType(DecimalTypeInfo.getQualifiedName(HiveDecimal.USER_DEFAULT_PRECISION,
HiveDecimal.USER_DEFAULT_SCALE));
col.setType(
DecimalTypeInfo.getQualifiedName(HiveDecimal.USER_DEFAULT_PRECISION, HiveDecimal.USER_DEFAULT_SCALE));
}
if (overriddenColumnTypes.containsKey(col.getName())) {
col.setType(overriddenColumnTypes.get(col.getName()));
}
}
}
Expand Down
8 changes: 6 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/DDLPlanUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;

import java.util.Collections;
import java.util.Comparator;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
Expand All @@ -38,7 +40,6 @@
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.metastore.api.SerDeInfo;
import org.apache.hadoop.hive.metastore.api.SkewedInfo;
Expand Down Expand Up @@ -910,8 +911,11 @@ private String getExternal(Table table) {

private String getColumns(Table table) {
List<String> columnDescs = new ArrayList<>();
Map<String, String> colTypes =
table.isNonNative() ? table.getStorageHandler().listOverriddenColumnTypes(table) : Collections.emptyMap();
for (FieldSchema column : table.getCols()) {
String columnType = formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType()));
String columnType = colTypes.getOrDefault(column.getName(),
formatType(TypeInfoUtils.getTypeInfoFromTypeString(column.getType())));
String columnDesc = " " + unparseIdentifier(column.getName()) + " " + columnType;
if (column.getComment() != null) {
columnDesc += " COMMENT '" + HiveStringUtils.escapeHiveCommand(column.getComment()) + "'";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -900,7 +900,7 @@ default List<String> getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table
default List<String> getPartitionNames(org.apache.hadoop.hive.ql.metadata.Table table) throws SemanticException {
return getPartitionNames(table, Maps.newHashMap());
}

default ColumnInfo getColumnInfo(org.apache.hadoop.hive.ql.metadata.Table hmsTable, String colName)
throws SemanticException {
throw new UnsupportedOperationException("Storage handler does not support getting column type " +
Expand Down Expand Up @@ -1023,4 +1023,8 @@ default void setMergeTaskDeleteProperties(TableDesc tableDesc) {
default boolean supportsDefaultColumnValues(Map<String, String> tblProps) {
return false;
}

default Map<String, String> listOverriddenColumnTypes(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
return Collections.emptyMap();
}
}