diff --git a/hivelink-core/src/main/java/org/apache/iceberg/hivelink/core/LegacyHiveTableUtils.java b/hivelink-core/src/main/java/org/apache/iceberg/hivelink/core/LegacyHiveTableUtils.java index 41f1e42014..c5e650c910 100644 --- a/hivelink-core/src/main/java/org/apache/iceberg/hivelink/core/LegacyHiveTableUtils.java +++ b/hivelink-core/src/main/java/org/apache/iceberg/hivelink/core/LegacyHiveTableUtils.java @@ -19,6 +19,7 @@ package org.apache.iceberg.hivelink.core; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -42,6 +43,7 @@ import org.apache.iceberg.hivelink.core.utils.HiveTypeUtil; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -88,7 +90,8 @@ static Schema getSchema(org.apache.hadoop.hive.metastore.api.Table table) { Types.StructType dataStructType = schema.asStruct(); List fields = Lists.newArrayList(dataStructType.fields()); - Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema); + String partitionColumnIdMappingString = props.get("partition.column.ids"); + Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema, partitionColumnIdMappingString); Types.StructType partitionStructType = partitionSchema.asStruct(); fields.addAll(partitionStructType.fields()); return new Schema(fields); @@ -107,7 +110,8 @@ static StructTypeInfo structTypeInfoFromCols(List cols) { return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos); } - private static Schema partitionSchema(List partitionKeys, Schema dataSchema) { + private static Schema partitionSchema(List partitionKeys, Schema dataSchema, String idMapping) { + Map nameToId = parsePartitionColId(idMapping); AtomicInteger fieldId = new AtomicInteger(10000); List partitionFields = Lists.newArrayList(); partitionKeys.forEach(f -> { @@ -117,11 +121,39 @@ private static Schema partitionSchema(List partitionKeys, Schema da } partitionFields.add( Types.NestedField.optional( - fieldId.incrementAndGet(), f.getName(), primitiveIcebergType(f.getType()), f.getComment())); + nameToId.containsKey(f.getName()) ? nameToId.get(f.getName()) : fieldId.incrementAndGet(), + f.getName(), primitiveIcebergType(f.getType()), f.getComment())); }); return new Schema(partitionFields); } + /** + * + * @param idMapping A comma separated string representation of column name + * and its id, e.g. partitionCol1:10,partitionCol2:11, no + * whitespace is allowed in the middle + * @return The parsed in-mem Map representation of the name to + * id mapping + */ + private static Map parsePartitionColId(String idMapping) { + Map nameToId = Maps.newHashMap(); + if (idMapping != null) { + // parse idMapping string + Arrays.stream(idMapping.split(",")).forEach(kv -> { + String[] split = kv.split(":"); + if (split.length != 2) { + throw new IllegalStateException(String.format( + "partition.column.ids property is invalid format: %s", + idMapping)); + } + String name = split[0]; + Integer id = Integer.parseInt(split[1]); + nameToId.put(name, id); + }); + } + return nameToId; + } + private static Type primitiveIcebergType(String hiveTypeString) { PrimitiveTypeInfo primitiveTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(hiveTypeString); return HiveTypeUtil.convert(primitiveTypeInfo);