Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions api/src/main/java/org/apache/iceberg/Schema.java
Original file line number Diff line number Diff line change
Expand Up @@ -193,11 +193,11 @@ public List<NestedField> columns() {
* It consists of a unique set of primitive fields in the schema.
* An identifier field must be at root, or nested in a chain of structs (no maps or lists).
* A row should be unique in a table based on the values of the identifier fields.
* Optional, float and double columns cannot be used as identifier fields.
* However, Iceberg identifier differs from primary key in the following ways:
* <ul>
* <li>Iceberg does not enforce the uniqueness of a row based on this identifier information.
* It is used for operations like upsert to define the default upsert key.</li>
* <li>NULL can be used as value of an identifier field. Iceberg ensures null-safe equality check.</li>
* <li>A nested field in a struct can be used as an identifier. For example, if there is a "last_name" field
* inside a "user" struct in a schema, field "user.last_name" can be set as a part of the identifier field.</li>
* </ul>
Expand All @@ -215,7 +215,7 @@ public Set<Integer> identifierFieldIds() {
public Set<String> identifierFieldNames() {
return identifierFieldIds()
.stream()
.map(id -> findField(id).name())
.map(id -> lazyIdToName().get(id))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is another bug not really related to the discussion, trying to just merge it here instead of having another 1 line PR.

.collect(Collectors.toSet());
}

Expand Down
7 changes: 6 additions & 1 deletion core/src/main/java/org/apache/iceberg/SchemaUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ private static Schema applyChanges(Schema schema, List<Integer> deletes,
Set<Integer> freshIdentifierFieldIds = Sets.newHashSet();
for (String name : identifierFieldNames) {
Preconditions.checkArgument(nameToId.containsKey(name),
"Cannot add field %s as an identifier field: not found in current schema or added columns");
"Cannot add field %s as an identifier field: not found in current schema or added columns", name);
freshIdentifierFieldIds.add(nameToId.get(name));
}

Expand All @@ -474,6 +474,11 @@ private static void validateIdentifierField(int fieldId, Map<Integer, Types.Nest
Types.NestedField field = idToField.get(fieldId);
Preconditions.checkArgument(field.type().isPrimitiveType(),
"Cannot add field %s as an identifier field: not a primitive type field", field.name());
Preconditions.checkArgument(field.isRequired(),
"Cannot add field %s as an identifier field: not a required field", field.name());
Preconditions.checkArgument(!Types.DoubleType.get().equals(field.type()) &&
!Types.FloatType.get().equals(field.type()),
"Cannot add field %s as an identifier field: must not be float or double field", field.name());

// check whether the nested field is in a chain of struct fields
Integer parentId = idToParent.get(field.fieldId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import org.junit.runners.Parameterized;

import static org.apache.iceberg.PartitionSpec.unpartitioned;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;

@RunWith(Parameterized.class)
Expand Down Expand Up @@ -86,7 +85,8 @@ public void testCreateTransactionAndUpdateSchema() throws IOException {
TestTables.metadataVersion("test_create"));

txn.updateSchema()
.addColumn("col", Types.StringType.get())
.allowIncompatibleChanges()
.addRequiredColumn("col", Types.StringType.get())
.setIdentifierFields("id", "col")
.commit();

Expand All @@ -103,7 +103,7 @@ public void testCreateTransactionAndUpdateSchema() throws IOException {
Lists.newArrayList(
required(1, "id", Types.IntegerType.get()),
required(2, "data", Types.StringType.get()),
optional(3, "col", Types.StringType.get())),
required(3, "col", Types.StringType.get())),
Sets.newHashSet(1, 3)
);

Expand Down
79 changes: 59 additions & 20 deletions core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -1238,7 +1238,8 @@ public void testAddExistingIdentifierFields() {
@Test
public void testAddNewIdentifierFieldColumns() {
Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.addColumn("new_field", Types.StringType.get())
.allowIncompatibleChanges()
.addRequiredColumn("new_field", Types.StringType.get())
.setIdentifierFields("id", "new_field")
.apply();

Expand All @@ -1247,8 +1248,9 @@ public void testAddNewIdentifierFieldColumns() {
newSchema.identifierFieldIds());

newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.allowIncompatibleChanges()
.setIdentifierFields("id", "new_field")
.addColumn("new_field", Types.StringType.get())
.addRequiredColumn("new_field", Types.StringType.get())
.apply();

Assert.assertEquals("set identifier then add column should succeed",
Expand All @@ -1267,8 +1269,9 @@ public void testAddNestedIdentifierFieldColumns() {
newSchema.identifierFieldIds());

newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.addColumn("new", Types.StructType.of(
Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 1, "field", Types.StringType.get())
.allowIncompatibleChanges()
.addRequiredColumn("new", Types.StructType.of(
Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 1, "field", Types.StringType.get())
))
.setIdentifierFields("new.field")
.apply();
Expand All @@ -1278,9 +1281,10 @@ public void testAddNestedIdentifierFieldColumns() {
newSchema.identifierFieldIds());

newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.addColumn("new", Types.StructType.of(
Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 1, "field", Types.StructType.of(
Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 2, "nested", Types.StringType.get())))))
.allowIncompatibleChanges()
.addRequiredColumn("new", Types.StructType.of(
Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 1, "field", Types.StructType.of(
Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 2, "nested", Types.StringType.get())))))
.setIdentifierFields("new.field.nested")
.apply();

Expand All @@ -1292,7 +1296,8 @@ public void testAddNestedIdentifierFieldColumns() {
@Test
public void testAddDottedIdentifierFieldColumns() {
Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.addColumn(null, "dot.field", Types.StringType.get())
.allowIncompatibleChanges()
.addRequiredColumn(null, "dot.field", Types.StringType.get())
.setIdentifierFields("id", "dot.field")
.apply();

Expand All @@ -1304,8 +1309,9 @@ public void testAddDottedIdentifierFieldColumns() {
@Test
public void testRemoveIdentifierFields() {
Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.addColumn("new_field", Types.StringType.get())
.addColumn("new_field2", Types.StringType.get())
.allowIncompatibleChanges()
.addRequiredColumn("new_field", Types.StringType.get())
.addRequiredColumn("new_field2", Types.StringType.get())
.setIdentifierFields("id", "new_field", "new_field2")
.apply();

Expand Down Expand Up @@ -1342,18 +1348,18 @@ public void testSetIdentifierFieldsFails() {
.setIdentifierFields("locations")
.apply());

AssertHelpers.assertThrows("add a map key nested field should fail",
AssertHelpers.assertThrows("add an optional field should fail",
IllegalArgumentException.class,
"must not be nested in " + SCHEMA.findField("locations"),
"not a required field",
() -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.setIdentifierFields("locations.key.zip")
.setIdentifierFields("data")
.apply());

AssertHelpers.assertThrows("add a map value nested field should fail",
AssertHelpers.assertThrows("add a map key nested field should fail",
IllegalArgumentException.class,
"must not be nested in " + SCHEMA.findField("locations"),
() -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.setIdentifierFields("locations.value.lat")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was the test case using locations.value.lat removed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah because that will hit the double check before hitting the map check. I added another one later with column new_map.value.val_col

.setIdentifierFields("locations.key.zip")
.apply());

AssertHelpers.assertThrows("add a nested field in list should fail",
Expand All @@ -1364,19 +1370,52 @@ public void testSetIdentifierFieldsFails() {
.apply());

Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID)
.addColumn("new", Types.StructType.of(
Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 1, "fields", Types.ListType.ofOptional(
SCHEMA_LAST_COLUMN_ID + 2, Types.StructType.of(
Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 3, "nested", Types.StringType.get())
.allowIncompatibleChanges()
.addRequiredColumn("col_float", Types.FloatType.get())
.addRequiredColumn("col_double", Types.DoubleType.get())
.addRequiredColumn("new", Types.StructType.of(
Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 3, "fields", Types.ListType.ofOptional(
SCHEMA_LAST_COLUMN_ID + 4, Types.StructType.of(
Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 5, "nested", Types.StringType.get())
))
)
))
.addRequiredColumn("new_map", Types.MapType.ofRequired(SCHEMA_LAST_COLUMN_ID + 6, 11,
Types.StructType.of(
required(SCHEMA_LAST_COLUMN_ID + 7, "key_col", Types.StringType.get())
),
Types.StructType.of(
required(SCHEMA_LAST_COLUMN_ID + 8, "val_col", Types.StringType.get())
)), "map of address to coordinate")
.apply();

int lastColId = SCHEMA_LAST_COLUMN_ID + 8;

AssertHelpers.assertThrows("add a double field should fail",
IllegalArgumentException.class,
"must not be float or double field",
() -> new SchemaUpdate(newSchema, lastColId)
.setIdentifierFields("col_double")
.apply());

AssertHelpers.assertThrows("add a float field should fail",
IllegalArgumentException.class,
"must not be float or double field",
() -> new SchemaUpdate(newSchema, lastColId)
.setIdentifierFields("col_float")
.apply());

AssertHelpers.assertThrows("add a map value nested field should fail",
IllegalArgumentException.class,
"must not be nested in " + newSchema.findField("new_map"),
() -> new SchemaUpdate(newSchema, lastColId)
.setIdentifierFields("new_map.value.val_col")
.apply());

AssertHelpers.assertThrows("add a nested field in struct of a map should fail",
IllegalArgumentException.class,
"must not be nested in " + newSchema.findField("new.fields"),
() -> new SchemaUpdate(newSchema, SCHEMA_LAST_COLUMN_ID + 3)
() -> new SchemaUpdate(newSchema, lastColId)
.setIdentifierFields("new.fields.element.nested")
.apply());
}
Expand Down