Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions core/src/main/java/org/apache/iceberg/SchemaUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
Expand Down Expand Up @@ -164,7 +165,7 @@ private void internalAddColumn(
int newId = assignNewColumnId();

// update tracking for moves
addedNameToId.put(fullName, newId);
addedNameToId.put(caseSensitivityAwareName(fullName), newId);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

initially I was thinking that we should not store the name in a case insensitive way but only keep the lookup case insensitive (similar to how we do it when we lookup fields in Schema. However, Schema keeps two maps, one which is case sensitive and the other which is case insensitive. I was also thinking whether this distinction would make sense for SchemaUpdate but couldn't find a good enough argument to justify this, so I think the changes you have here make sense to me

if (parentId != TABLE_ROOT_ID) {
idToParent.put(newId, parentId);
}
Expand Down Expand Up @@ -391,7 +392,7 @@ public UpdateSchema caseSensitive(boolean caseSensitivity) {
}

private boolean isAdded(String name) {
return addedNameToId.containsKey(name);
return addedNameToId.containsKey(caseSensitivityAwareName(name));
}

private Types.NestedField findForUpdate(String name) {
Expand All @@ -405,7 +406,7 @@ private Types.NestedField findForUpdate(String name) {
return existing;
}

Integer addedId = addedNameToId.get(name);
Integer addedId = addedNameToId.get(caseSensitivityAwareName(name));
if (addedId != null) {
return updates.get(addedId);
}
Expand All @@ -414,7 +415,7 @@ private Types.NestedField findForUpdate(String name) {
}

private Integer findForMove(String name) {
Integer addedId = addedNameToId.get(name);
Integer addedId = addedNameToId.get(caseSensitivityAwareName(name));
if (addedId != null) {
return addedId;
}
Expand Down Expand Up @@ -870,4 +871,8 @@ public MoveType type() {
private Types.NestedField findField(String fieldName) {
return caseSensitive ? schema.findField(fieldName) : schema.caseInsensitiveFindField(fieldName);
}

private String caseSensitivityAwareName(String name) {
return caseSensitive ? name : name.toLowerCase(Locale.ROOT);
}
}
76 changes: 76 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -2503,4 +2503,80 @@ public void testAddRequiredUnknown() {
.isInstanceOf(IllegalArgumentException.class)
.hasMessage("Cannot create required field with unknown type: unk");
}

@Test
public void testCaseInsensitiveAddTopLevelAndMove() {
Schema schema = new Schema(required(1, "id", Types.LongType.get()));

Schema expected =
new Schema(
optional(2, "data", Types.StringType.get()), required(1, "id", Types.LongType.get()));

Schema actual =
new SchemaUpdate(schema, schema.highestFieldId())
.caseSensitive(false)
.addColumn("data", Types.StringType.get())
.moveFirst("dAtA")
.apply();

assertThat(actual.asStruct()).isEqualTo(expected.asStruct());
}

@Test
public void testCaseInsensitiveAddNestedAndMove() {
Schema schema =
new Schema(
required(1, "id", Types.LongType.get()),
optional(
2, "struct", Types.StructType.of(required(3, "field1", Types.StringType.get()))));

Schema expected =
new Schema(
required(1, "id", Types.LongType.get()),
optional(
2,
"struct",
Types.StructType.of(
optional(4, "field2", Types.IntegerType.get()),
required(3, "field1", Types.StringType.get()))));

Schema actual =
new SchemaUpdate(schema, schema.highestFieldId())
.caseSensitive(false)
.addColumn("STRUCT", "field2", Types.IntegerType.get())
.moveFirst("STRUCT.FIELD2")
.apply();

assertThat(actual.asStruct()).isEqualTo(expected.asStruct());
}

@Test
public void testCaseInsensitiveMoveAfterNewlyAddedField() {
Schema schema =
new Schema(
required(1, "id", Types.LongType.get()),
optional(
2, "struct", Types.StructType.of(required(3, "field1", Types.StringType.get()))));

Schema expected =
new Schema(
required(1, "id", Types.LongType.get()),
optional(
2,
"struct",
Types.StructType.of(
required(3, "field1", Types.StringType.get()),
optional(4, "field2", Types.IntegerType.get()),
optional(5, "field3", Types.DoubleType.get()))));

Schema actual =
new SchemaUpdate(schema, schema.highestFieldId())
.caseSensitive(false)
.addColumn("struct", "field2", Types.IntegerType.get())
.addColumn("STRUCT", "field3", Types.DoubleType.get())
.moveAfter("STRUCT.FIELD3", "struct.FIELD2")
.apply();

assertThat(actual.asStruct()).isEqualTo(expected.asStruct());
}
}