linkedin · rzhang10 · Aug 16, 2022 · Aug 12, 2022 · Aug 16, 2022
diff --git a/hivelink-core/src/main/java/org/apache/iceberg/hivelink/core/LegacyHiveTableUtils.java b/hivelink-core/src/main/java/org/apache/iceberg/hivelink/core/LegacyHiveTableUtils.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.hivelink.core;
 
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -42,6 +43,7 @@
 import org.apache.iceberg.hivelink.core.utils.HiveTypeUtil;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.types.Conversions;
 import org.apache.iceberg.types.Type;
 import org.apache.iceberg.types.Types;
@@ -88,7 +90,8 @@ static Schema getSchema(org.apache.hadoop.hive.metastore.api.Table table) {
     Types.StructType dataStructType = schema.asStruct();
     List<Types.NestedField> fields = Lists.newArrayList(dataStructType.fields());
 
-    Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema);
+    String partitionColumnIdMappingString = props.get("partition.column.ids");
+    Schema partitionSchema = partitionSchema(table.getPartitionKeys(), schema, partitionColumnIdMappingString);
     Types.StructType partitionStructType = partitionSchema.asStruct();
     fields.addAll(partitionStructType.fields());
     return new Schema(fields);
@@ -107,7 +110,8 @@ static StructTypeInfo structTypeInfoFromCols(List<FieldSchema> cols) {
     return (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos);
   }
 
-  private static Schema partitionSchema(List<FieldSchema> partitionKeys, Schema dataSchema) {
+  private static Schema partitionSchema(List<FieldSchema> partitionKeys, Schema dataSchema, String idMapping) {
+    Map<String, Integer> nameToId = parsePartitionColId(idMapping);
     AtomicInteger fieldId = new AtomicInteger(10000);
     List<Types.NestedField> partitionFields = Lists.newArrayList();
     partitionKeys.forEach(f -> {
@@ -117,11 +121,39 @@ private static Schema partitionSchema(List<FieldSchema> partitionKeys, Schema da
       }
       partitionFields.add(
           Types.NestedField.optional(
-              fieldId.incrementAndGet(), f.getName(), primitiveIcebergType(f.getType()), f.getComment()));
+              nameToId.containsKey(f.getName()) ? nameToId.get(f.getName()) : fieldId.incrementAndGet(),
+              f.getName(), primitiveIcebergType(f.getType()), f.getComment()));
     });
     return new Schema(partitionFields);
   }
 
+  /**
+   *
+   * @param idMapping A comma separated string representation of column name
+   *                  and its id, e.g. partitionCol1:10,partitionCol2:11, no
+   *                  whitespace is allowed in the middle
+   * @return          The parsed in-mem Map representation of the name to
+   *                  id mapping
+   */
+  private static Map<String, Integer> parsePartitionColId(String idMapping) {
+    Map<String, Integer> nameToId = Maps.newHashMap();
+    if (idMapping != null) {
+      // parse idMapping string
+      Arrays.stream(idMapping.split(",")).forEach(kv -> {
+        String[] split = kv.split(":");
+        if (split.length != 2) {
+          throw new IllegalStateException(String.format(
+              "partition.column.ids property is invalid format: %s",
+              idMapping));
+        }
+        String name = split[0];
+        Integer id = Integer.parseInt(split[1]);
+        nameToId.put(name, id);
+      });
+    }
+    return nameToId;
+  }
+
   private static Type primitiveIcebergType(String hiveTypeString) {
     PrimitiveTypeInfo primitiveTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(hiveTypeString);
     return HiveTypeUtil.convert(primitiveTypeInfo);