apache · openinx · May 11, 2021 · May 6, 2021 · May 6, 2021 · May 7, 2021
diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java
@@ -209,6 +209,16 @@ public Set<Integer> identifierFieldIds() {
     return lazyIdentifierFieldIdSet();
   }
 
+  /**
+   * Returns the set of identifier field names.
+   */
+  public Set<String> identifierFieldNames() {
+    return identifierFieldIds()
+            .stream()
+            .map(id -> findField(id).name())
+            .collect(Collectors.toSet());
+  }
+
   /**
    * Returns the {@link Type} of a sub-field identified by the field name.
    *

diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
@@ -22,10 +22,12 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Predicate;
 import java.util.function.Supplier;
+import java.util.stream.Collectors;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
@@ -181,10 +183,24 @@ public static Schema assignFreshIds(int schemaId, Schema schema, NextID nextId)
    * @return a structurally identical schema with new ids assigned by the nextId function
    */
   public static Schema assignFreshIds(Schema schema, Schema baseSchema, NextID nextId) {
-    return new Schema(TypeUtil
-        .visit(schema.asStruct(), new AssignFreshIds(schema, baseSchema, nextId))
-        .asNestedType()
-        .fields());
+    Types.StructType freshSchemaStruct = TypeUtil
+            .visit(schema.asStruct(), new AssignFreshIds(schema, baseSchema, nextId))
+            .asStructType();
+    return new Schema(freshSchemaStruct.fields(), refreshIdentifierFields(freshSchemaStruct, schema));
+  }
+
+  /**
+   * Get the identifier fields in the fresh schema based on the identifier fields in the base schema.
+   * @param freshSchema fresh schema
+   * @param baseSchema base schema
+   * @return idnetifier fields in the fresh schema
+   */
+  public static Set<Integer> refreshIdentifierFields(Types.StructType freshSchema, Schema baseSchema) {
+    Map<String, Integer> nameToId = TypeUtil.indexByName(freshSchema);
+    return baseSchema.identifierFieldNames().stream()
+            .map(nameToId::get)
+            .filter(Objects::nonNull)
+            .collect(Collectors.toSet());
   }
 
   /**
@@ -213,7 +229,7 @@ public static Schema assignIncreasingFreshIds(Schema schema) {
    */
   public static Schema reassignIds(Schema schema, Schema idSourceSchema) {
     Types.StructType struct = visit(schema, new ReassignIds(idSourceSchema)).asStructType();
-    return new Schema(struct.fields());
+    return new Schema(struct.fields(), refreshIdentifierFields(struct, schema));
   }
 
   public static Type find(Schema schema, Predicate<Type> predicate) {

diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java b/api/src/test/java/org/apache/iceberg/types/TestTypeUtil.java
@@ -21,6 +21,8 @@
 package org.apache.iceberg.types;
 
 import org.apache.iceberg.Schema;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -42,6 +44,25 @@ public void testReassignIdsDuplicateColumns() {
     Assert.assertEquals(sourceSchema.asStruct(), actualSchema.asStruct());
   }
 
+  @Test
+  public void testReassignIdsWithIdentifier() {
+    Schema schema = new Schema(
+        Lists.newArrayList(
+            required(0, "a", Types.IntegerType.get()),
+            required(1, "A", Types.IntegerType.get())),
+        Sets.newHashSet(0)
+    );
+    Schema sourceSchema = new Schema(
+        Lists.newArrayList(
+            required(1, "a", Types.IntegerType.get()),
+            required(2, "A", Types.IntegerType.get())),
+        Sets.newHashSet(1)
+    );
+    final Schema actualSchema = TypeUtil.reassignIds(schema, sourceSchema);
+    Assert.assertEquals(sourceSchema.asStruct(), actualSchema.asStruct());
+    Assert.assertEquals(sourceSchema.identifierFieldIds(), actualSchema.identifierFieldIds());
+  }
+
   @Test(expected = IllegalArgumentException.class)
   public void testReassignIdsIllegalArgumentException() {
     Schema schema = new Schema(

diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java
@@ -25,7 +25,6 @@
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
-import java.util.stream.Collectors;
 import org.apache.iceberg.mapping.MappingUtil;
 import org.apache.iceberg.mapping.NameMapping;
 import org.apache.iceberg.mapping.NameMappingParser;
@@ -85,9 +84,7 @@ private SchemaUpdate(TableOperations ops, TableMetadata base, Schema schema, int
     this.schema = schema;
     this.lastColumnId = lastColumnId;
     this.idToParent = Maps.newHashMap(TypeUtil.indexParents(schema.asStruct()));
-    this.identifierFieldNames = schema.identifierFieldIds().stream()
-        .map(id -> schema.findField(id).name())
-        .collect(Collectors.toSet());
+    this.identifierFieldNames = schema.identifierFieldNames();
   }
 
   @Override

diff --git a/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopCommits.java b/core/src/test/java/org/apache/iceberg/hadoop/TestHadoopCommits.java
@@ -158,6 +158,32 @@ public void testSchemaUpdateComplexType() throws Exception {
     Assert.assertEquals("Should contain 0 Avro manifest files", 0, manifests.size());
   }
 
+  @Test
+  public void testSchemaUpdateIdentifierFields() throws Exception {
+    Assert.assertTrue("Should create v1 metadata",
+        version(1).exists() && version(1).isFile());
+    Assert.assertFalse("Should not create v2 or newer versions",
+        version(2).exists());
+
+    Schema updatedSchema = new Schema(Lists.newArrayList(
+        required(1, "id", Types.IntegerType.get(), "unique ID"),
+        required(2, "data", Types.StringType.get())
+    ), Sets.newHashSet(1));
+
+    table.updateSchema()
+        .setIdentifierFields("id")
+        .commit();
+
+    Assert.assertTrue("Should create v2 for the update",
+        version(2).exists() && version(2).isFile());
+    Assert.assertEquals("Should write the current version to the hint file",
+        2, readVersionHint());
+    Assert.assertEquals("Table schema should match schema with reassigned ids",
+        updatedSchema.asStruct(), table.schema().asStruct());
+    Assert.assertEquals("Identifier fields should match schema with reassigned ids",
+        updatedSchema.identifierFieldIds(), table.schema().identifierFieldIds());
+  }
+
   @Test
   public void testFailedCommit() throws Exception {
     // apply the change to metadata without committing