apache · rdblue · Feb 23, 2021 · Jan 14, 2021 · Feb 2, 2021 · Feb 4, 2021
diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java
@@ -40,12 +40,16 @@
 
 /**
  * The schema of a data table.
+ * <p>
+ * Schema ID will only be populated when reading from/writing to table metadata,
+ * otherwise it will be default to 0.
  */
 public class Schema implements Serializable {
   private static final Joiner NEWLINE = Joiner.on('\n');
   private static final String ALL_COLUMNS = "*";
 
   private final StructType struct;
+  private final int schemaId;
   private transient BiMap<String, Integer> aliasToId = null;
   private transient Map<Integer, NestedField> idToField = null;
   private transient Map<String, Integer> nameToId = null;
@@ -54,6 +58,7 @@ public class Schema implements Serializable {
   private transient Map<Integer, String> idToName = null;
 
   public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
+    this.schemaId = 0;
     this.struct = StructType.of(columns);
     this.aliasToId = aliases != null ? ImmutableBiMap.copyOf(aliases) : null;
 
@@ -62,12 +67,21 @@ public Schema(List<NestedField> columns, Map<String, Integer> aliases) {
   }
 
   public Schema(List<NestedField> columns) {
+    this(0, columns);
+  }
+
+  public Schema(int schemaId, List<NestedField> columns) {
+    this.schemaId = schemaId;
     this.struct = StructType.of(columns);
     lazyIdToName();
   }
 
   public Schema(NestedField... columns) {
-    this(Arrays.asList(columns));
+    this(0, Arrays.asList(columns));
+  }
+
+  public Schema(int schemaId, NestedField... columns) {
+    this(schemaId, Arrays.asList(columns));
   }
 
   private Map<Integer, NestedField> lazyIdToField() {
@@ -105,6 +119,16 @@ private Map<Integer, Accessor<StructLike>> lazyIdToAccessor() {
     return idToAccessor;
   }
 
+  /**
+   * Returns the schema ID for this schema.
+   * <p>
+   * Note that schema ID will only be populated when reading from/writing to table metadata,
+   * otherwise it will be default to 0.
+   */
+  public int schemaId() {
+    return this.schemaId;
+  }
+
   /**
    * Returns an alias map for this schema, if set.
    * <p>

diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java
@@ -157,6 +157,21 @@ public static Schema assignFreshIds(Schema schema, NextID nextId) {
         .fields());
   }
 
+  /**
+   * Assigns fresh ids from the {@link NextID nextId function} for all fields in a schema.
+   *
+   * @param schemaId an ID assigned to this schema
+   * @param schema a schema
+   * @param nextId an id assignment function
+   * @return a structurally identical schema with new ids assigned by the nextId function
+   */
+  public static Schema assignFreshIds(int schemaId, Schema schema, NextID nextId) {
+    return new Schema(schemaId, TypeUtil
+        .visit(schema.asStruct(), new AssignFreshIds(nextId))
+        .asNestedType()
+        .fields());
+  }
+
   /**
    * Assigns ids to match a given schema, and fresh ids from the {@link NextID nextId function} for all other fields.
    *

diff --git a/core/src/main/java/org/apache/iceberg/SchemaParser.java b/core/src/main/java/org/apache/iceberg/SchemaParser.java
@@ -39,6 +39,7 @@ public class SchemaParser {
   private SchemaParser() {
   }
 
+  private static final String SCHEMA_ID = "schema-id";
   private static final String TYPE = "type";
   private static final String STRUCT = "struct";
   private static final String LIST = "list";
@@ -58,9 +59,17 @@ private SchemaParser() {
   private static final String VALUE_REQUIRED = "value-required";
 
   static void toJson(Types.StructType struct, JsonGenerator generator) throws IOException {
+    toJson(struct, null, generator);
+  }
+
+  static void toJson(Types.StructType struct, Integer schemaId, JsonGenerator generator) throws IOException {
     generator.writeStartObject();
 
     generator.writeStringField(TYPE, STRUCT);
+    if (schemaId != null) {
+      generator.writeNumberField(SCHEMA_ID, schemaId);
+    }
+
     generator.writeArrayFieldStart(FIELDS);
     for (Types.NestedField field : struct.fields()) {
       generator.writeStartObject();
@@ -134,6 +143,10 @@ static void toJson(Type type, JsonGenerator generator) throws IOException {
     }
   }
 
+  public static void toJsonWithId(Schema schema, JsonGenerator generator) throws IOException {
+    toJson(schema.asStruct(), schema.schemaId(), generator);
+  }
+
   public static void toJson(Schema schema, JsonGenerator generator) throws IOException {
     toJson(schema.asStruct(), generator);
   }
@@ -253,4 +266,15 @@ public static Schema fromJson(String json) {
       }
     });
   }
+
+  public static Schema fromJsonWithId(int schemaId, JsonNode json) {
+    Type type  = typeFromJson(json);
+    Preconditions.checkArgument(type.isNestedType() && type.asNestedType().isStructType(),
+        "Cannot create schema, not a struct type: %s", type);
+    return new Schema(schemaId, type.asNestedType().asStructType().fields());
+  }
+
+  public static Schema fromJsonWithId(JsonNode json) {
+    return fromJsonWithId(JsonUtil.getInt(SCHEMA_ID, json), json);
+  }
 }