-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Core: update schema constructor callers to include fresh identifier #2556
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.function.Predicate; | ||
| import java.util.function.Supplier; | ||
| import java.util.stream.Collectors; | ||
| import org.apache.iceberg.Schema; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
|
|
@@ -151,10 +152,8 @@ public static Type assignFreshIds(Type type, NextID nextId) { | |
| * @return a structurally identical schema with new ids assigned by the nextId function | ||
| */ | ||
| public static Schema assignFreshIds(Schema schema, NextID nextId) { | ||
| return new Schema(TypeUtil | ||
| .visit(schema.asStruct(), new AssignFreshIds(nextId)) | ||
| .asNestedType() | ||
| .fields()); | ||
| Types.StructType struct = TypeUtil.visit(schema.asStruct(), new AssignFreshIds(nextId)).asStructType(); | ||
| return new Schema(struct.fields(), refreshIdentifierFields(struct, schema)); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -166,10 +165,8 @@ public static Schema assignFreshIds(Schema schema, NextID nextId) { | |
| * @return a structurally identical schema with new ids assigned by the nextId function | ||
| */ | ||
| public static Schema assignFreshIds(int schemaId, Schema schema, NextID nextId) { | ||
| return new Schema(schemaId, TypeUtil | ||
| .visit(schema.asStruct(), new AssignFreshIds(nextId)) | ||
| .asNestedType() | ||
| .fields()); | ||
| Types.StructType struct = TypeUtil.visit(schema.asStruct(), new AssignFreshIds(nextId)).asStructType(); | ||
| return new Schema(schemaId, struct.fields(), refreshIdentifierFields(struct, schema)); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -181,10 +178,24 @@ public static Schema assignFreshIds(int schemaId, Schema schema, NextID nextId) | |
| * @return a structurally identical schema with new ids assigned by the nextId function | ||
| */ | ||
| public static Schema assignFreshIds(Schema schema, Schema baseSchema, NextID nextId) { | ||
| return new Schema(TypeUtil | ||
| Types.StructType struct = TypeUtil | ||
| .visit(schema.asStruct(), new AssignFreshIds(schema, baseSchema, nextId)) | ||
| .asNestedType() | ||
| .fields()); | ||
| .asStructType(); | ||
| return new Schema(struct.fields(), refreshIdentifierFields(struct, schema)); | ||
| } | ||
|
|
||
| /** | ||
| * Get the identifier fields in the fresh schema based on the identifier fields in the base schema. | ||
| * @param freshSchema fresh schema | ||
| * @param baseSchema base schema | ||
| * @return identifier fields in the fresh schema | ||
| */ | ||
| public static Set<Integer> refreshIdentifierFields(Types.StructType freshSchema, Schema baseSchema) { | ||
| Map<String, Integer> nameToId = TypeUtil.indexByName(freshSchema); | ||
| Set<String> identifierFieldNames = baseSchema.identifierFieldNames(); | ||
| identifierFieldNames.forEach(name -> Preconditions.checkArgument(nameToId.containsKey(name), | ||
| "Cannot find ID for identifier field %s in schema %s", name, freshSchema)); | ||
| return identifierFieldNames.stream().map(nameToId::get).collect(Collectors.toSet()); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -213,7 +224,7 @@ public static Schema assignIncreasingFreshIds(Schema schema) { | |
| */ | ||
| public static Schema reassignIds(Schema schema, Schema idSourceSchema) { | ||
| Types.StructType struct = visit(schema, new ReassignIds(idSourceSchema)).asStructType(); | ||
| return new Schema(struct.fields()); | ||
| return new Schema(struct.fields(), refreshIdentifierFields(struct, schema)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since I wonder if we want to use
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer to call |
||
| } | ||
|
|
||
| public static Type find(Schema schema, Predicate<Type> predicate) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,8 @@ | |
| package org.apache.iceberg.types; | ||
|
|
||
| import org.apache.iceberg.Schema; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Lists; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Sets; | ||
| import org.junit.Assert; | ||
| import org.junit.Test; | ||
|
|
||
|
|
@@ -42,6 +44,65 @@ public void testReassignIdsDuplicateColumns() { | |
| Assert.assertEquals(sourceSchema.asStruct(), actualSchema.asStruct()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testReassignIdsWithIdentifier() { | ||
| Schema schema = new Schema( | ||
| Lists.newArrayList( | ||
| required(0, "a", Types.IntegerType.get()), | ||
| required(1, "A", Types.IntegerType.get())), | ||
| Sets.newHashSet(0) | ||
| ); | ||
| Schema sourceSchema = new Schema( | ||
| Lists.newArrayList( | ||
| required(1, "a", Types.IntegerType.get()), | ||
| required(2, "A", Types.IntegerType.get())), | ||
| Sets.newHashSet(1) | ||
| ); | ||
| final Schema actualSchema = TypeUtil.reassignIds(schema, sourceSchema); | ||
| Assert.assertEquals(sourceSchema.asStruct(), actualSchema.asStruct()); | ||
| Assert.assertEquals("identifier field ID should change based on source schema", | ||
| sourceSchema.identifierFieldIds(), actualSchema.identifierFieldIds()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testAssignIncreasingFreshIdWithIdentifier() { | ||
| Schema schema = new Schema( | ||
| Lists.newArrayList( | ||
| required(10, "a", Types.IntegerType.get()), | ||
| required(11, "A", Types.IntegerType.get())), | ||
| Sets.newHashSet(10) | ||
| ); | ||
| Schema expectedSchema = new Schema( | ||
| Lists.newArrayList( | ||
| required(1, "a", Types.IntegerType.get()), | ||
| required(2, "A", Types.IntegerType.get())), | ||
| Sets.newHashSet(1) | ||
| ); | ||
| final Schema actualSchema = TypeUtil.assignIncreasingFreshIds(schema); | ||
| Assert.assertEquals(expectedSchema.asStruct(), actualSchema.asStruct()); | ||
| Assert.assertEquals("identifier field ID should change based on source schema", | ||
| expectedSchema.identifierFieldIds(), actualSchema.identifierFieldIds()); | ||
| } | ||
|
|
||
| @Test | ||
| public void testAssignIncreasingFreshIdNewIdentifier() { | ||
| Schema schema = new Schema( | ||
| Lists.newArrayList( | ||
| required(10, "a", Types.IntegerType.get()), | ||
| required(11, "A", Types.IntegerType.get())), | ||
| Sets.newHashSet(10) | ||
| ); | ||
| Schema sourceSchema = new Schema( | ||
| Lists.newArrayList( | ||
| required(1, "a", Types.IntegerType.get()), | ||
| required(2, "A", Types.IntegerType.get())) | ||
| ); | ||
| final Schema actualSchema = TypeUtil.reassignIds(schema, sourceSchema); | ||
| Assert.assertEquals(sourceSchema.asStruct(), actualSchema.asStruct()); | ||
| Assert.assertEquals("source schema missing identifier should not impact refreshing new identifier", | ||
| Sets.newHashSet(sourceSchema.findField("a").fieldId()), actualSchema.identifierFieldIds()); | ||
| } | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we want to add a case to call |
||
| @Test(expected = IllegalArgumentException.class) | ||
| public void testReassignIdsIllegalArgumentException() { | ||
| Schema schema = new Schema( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.