diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 06a48872edd0..dea54aab6279 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -193,11 +193,11 @@ public List columns() { * It consists of a unique set of primitive fields in the schema. * An identifier field must be at root, or nested in a chain of structs (no maps or lists). * A row should be unique in a table based on the values of the identifier fields. + * Optional, float and double columns cannot be used as identifier fields. * However, Iceberg identifier differs from primary key in the following ways: * @@ -215,7 +215,7 @@ public Set identifierFieldIds() { public Set identifierFieldNames() { return identifierFieldIds() .stream() - .map(id -> findField(id).name()) + .map(id -> lazyIdToName().get(id)) .collect(Collectors.toSet()); } diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java index c14398d17e1c..b3c0a1cf9f6e 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java +++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java @@ -458,7 +458,7 @@ private static Schema applyChanges(Schema schema, List deletes, Set freshIdentifierFieldIds = Sets.newHashSet(); for (String name : identifierFieldNames) { Preconditions.checkArgument(nameToId.containsKey(name), - "Cannot add field %s as an identifier field: not found in current schema or added columns"); + "Cannot add field %s as an identifier field: not found in current schema or added columns", name); freshIdentifierFieldIds.add(nameToId.get(name)); } @@ -474,6 +474,11 @@ private static void validateIdentifierField(int fieldId, Map new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) - .setIdentifierFields("locations.key.zip") + .setIdentifierFields("data") .apply()); - AssertHelpers.assertThrows("add a map value nested field should fail", + AssertHelpers.assertThrows("add a map key nested field should fail", IllegalArgumentException.class, "must not be nested in " + SCHEMA.findField("locations"), () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) - .setIdentifierFields("locations.value.lat") + .setIdentifierFields("locations.key.zip") .apply()); AssertHelpers.assertThrows("add a nested field in list should fail", @@ -1364,19 +1370,52 @@ public void testSetIdentifierFieldsFails() { .apply()); Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) - .addColumn("new", Types.StructType.of( - Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 1, "fields", Types.ListType.ofOptional( - SCHEMA_LAST_COLUMN_ID + 2, Types.StructType.of( - Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 3, "nested", Types.StringType.get()) + .allowIncompatibleChanges() + .addRequiredColumn("col_float", Types.FloatType.get()) + .addRequiredColumn("col_double", Types.DoubleType.get()) + .addRequiredColumn("new", Types.StructType.of( + Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 3, "fields", Types.ListType.ofOptional( + SCHEMA_LAST_COLUMN_ID + 4, Types.StructType.of( + Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 5, "nested", Types.StringType.get()) )) ) )) + .addRequiredColumn("new_map", Types.MapType.ofRequired(SCHEMA_LAST_COLUMN_ID + 6, 11, + Types.StructType.of( + required(SCHEMA_LAST_COLUMN_ID + 7, "key_col", Types.StringType.get()) + ), + Types.StructType.of( + required(SCHEMA_LAST_COLUMN_ID + 8, "val_col", Types.StringType.get()) + )), "map of address to coordinate") .apply(); + int lastColId = SCHEMA_LAST_COLUMN_ID + 8; + + AssertHelpers.assertThrows("add a double field should fail", + IllegalArgumentException.class, + "must not be float or double field", + () -> new SchemaUpdate(newSchema, lastColId) + .setIdentifierFields("col_double") + .apply()); + + AssertHelpers.assertThrows("add a float field should fail", + IllegalArgumentException.class, + "must not be float or double field", + () -> new SchemaUpdate(newSchema, lastColId) + .setIdentifierFields("col_float") + .apply()); + + AssertHelpers.assertThrows("add a map value nested field should fail", + IllegalArgumentException.class, + "must not be nested in " + newSchema.findField("new_map"), + () -> new SchemaUpdate(newSchema, lastColId) + .setIdentifierFields("new_map.value.val_col") + .apply()); + AssertHelpers.assertThrows("add a nested field in struct of a map should fail", IllegalArgumentException.class, "must not be nested in " + newSchema.findField("new.fields"), - () -> new SchemaUpdate(newSchema, SCHEMA_LAST_COLUMN_ID + 3) + () -> new SchemaUpdate(newSchema, lastColId) .setIdentifierFields("new.fields.element.nested") .apply()); }