From eaa6ff0ecfb66f64f0751bbe803b2bacee43e2de Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 16 Jun 2021 14:08:54 -0700 Subject: [PATCH 1/4] Core: do not allow optional, double and float identifier fields --- .../main/java/org/apache/iceberg/Schema.java | 4 +- .../java/org/apache/iceberg/SchemaUpdate.java | 5 ++ .../org/apache/iceberg/TestSchemaUpdate.java | 79 ++++++++++++++----- 3 files changed, 66 insertions(+), 22 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/Schema.java b/api/src/main/java/org/apache/iceberg/Schema.java index 06a48872edd0..dea54aab6279 100644 --- a/api/src/main/java/org/apache/iceberg/Schema.java +++ b/api/src/main/java/org/apache/iceberg/Schema.java @@ -193,11 +193,11 @@ public List columns() { * It consists of a unique set of primitive fields in the schema. * An identifier field must be at root, or nested in a chain of structs (no maps or lists). * A row should be unique in a table based on the values of the identifier fields. + * Optional, float and double columns cannot be used as identifier fields. * However, Iceberg identifier differs from primary key in the following ways: * @@ -215,7 +215,7 @@ public Set identifierFieldIds() { public Set identifierFieldNames() { return identifierFieldIds() .stream() - .map(id -> findField(id).name()) + .map(id -> lazyIdToName().get(id)) .collect(Collectors.toSet()); } diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java index c14398d17e1c..d4201fe0093b 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java +++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java @@ -474,6 +474,11 @@ private static void validateIdentifierField(int fieldId, Map new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) - .setIdentifierFields("locations.key.zip") + .setIdentifierFields("data") .apply()); - AssertHelpers.assertThrows("add a map value nested field should fail", + AssertHelpers.assertThrows("add a map key nested field should fail", IllegalArgumentException.class, "must not be nested in " + SCHEMA.findField("locations"), () -> new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) - .setIdentifierFields("locations.value.lat") + .setIdentifierFields("locations.key.zip") .apply()); AssertHelpers.assertThrows("add a nested field in list should fail", @@ -1364,19 +1370,52 @@ public void testSetIdentifierFieldsFails() { .apply()); Schema newSchema = new SchemaUpdate(SCHEMA, SCHEMA_LAST_COLUMN_ID) - .addColumn("new", Types.StructType.of( - Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 1, "fields", Types.ListType.ofOptional( - SCHEMA_LAST_COLUMN_ID + 2, Types.StructType.of( - Types.NestedField.optional(SCHEMA_LAST_COLUMN_ID + 3, "nested", Types.StringType.get()) + .allowIncompatibleChanges() + .addRequiredColumn("col_float", Types.FloatType.get()) + .addRequiredColumn("col_double", Types.DoubleType.get()) + .addRequiredColumn("new", Types.StructType.of( + Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 3, "fields", Types.ListType.ofOptional( + SCHEMA_LAST_COLUMN_ID + 4, Types.StructType.of( + Types.NestedField.required(SCHEMA_LAST_COLUMN_ID + 5, "nested", Types.StringType.get()) )) ) )) + .addRequiredColumn("new_map", Types.MapType.ofRequired(SCHEMA_LAST_COLUMN_ID + 6, 11, + Types.StructType.of( + required(SCHEMA_LAST_COLUMN_ID + 7, "key_col", Types.StringType.get()) + ), + Types.StructType.of( + required(SCHEMA_LAST_COLUMN_ID + 8, "val_col", Types.StringType.get()) + )), "map of address to coordinate") .apply(); + int lastColId = SCHEMA_LAST_COLUMN_ID + 8; + + AssertHelpers.assertThrows("add a double field should fail", + IllegalArgumentException.class, + "must not be float or double field", + () -> new SchemaUpdate(newSchema, lastColId) + .setIdentifierFields("col_double") + .apply()); + + AssertHelpers.assertThrows("add a float field should fail", + IllegalArgumentException.class, + "must not be float or double field", + () -> new SchemaUpdate(newSchema, lastColId) + .setIdentifierFields("col_float") + .apply()); + + AssertHelpers.assertThrows("add a map value nested field should fail", + IllegalArgumentException.class, + "must not be nested in " + newSchema.findField("new_map"), + () -> new SchemaUpdate(newSchema, lastColId) + .setIdentifierFields("new_map.value.val_col") + .apply()); + AssertHelpers.assertThrows("add a nested field in struct of a map should fail", IllegalArgumentException.class, "must not be nested in " + newSchema.findField("new.fields"), - () -> new SchemaUpdate(newSchema, SCHEMA_LAST_COLUMN_ID + 3) + () -> new SchemaUpdate(newSchema, lastColId) .setIdentifierFields("new.fields.element.nested") .apply()); } From 1753263a778cb0308713c134e2b7b17b9acfb6c6 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 16 Jun 2021 14:38:04 -0700 Subject: [PATCH 2/4] fix affected test --- .../test/java/org/apache/iceberg/TestCreateTransaction.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java b/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java index a693acfce79c..c7c8a7ca5742 100644 --- a/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java +++ b/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java @@ -86,7 +86,8 @@ public void testCreateTransactionAndUpdateSchema() throws IOException { TestTables.metadataVersion("test_create")); txn.updateSchema() - .addColumn("col", Types.StringType.get()) + .allowIncompatibleChanges() + .addRequiredColumn("col", Types.StringType.get()) .setIdentifierFields("id", "col") .commit(); @@ -103,7 +104,7 @@ public void testCreateTransactionAndUpdateSchema() throws IOException { Lists.newArrayList( required(1, "id", Types.IntegerType.get()), required(2, "data", Types.StringType.get()), - optional(3, "col", Types.StringType.get())), + required(3, "col", Types.StringType.get())), Sets.newHashSet(1, 3) ); From 7ec99468c9024a66a6cf2b635f1b01223aeb3b0e Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Wed, 16 Jun 2021 15:11:29 -0700 Subject: [PATCH 3/4] fix checkstyle --- core/src/test/java/org/apache/iceberg/TestCreateTransaction.java | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java b/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java index c7c8a7ca5742..283f85470723 100644 --- a/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java +++ b/core/src/test/java/org/apache/iceberg/TestCreateTransaction.java @@ -32,7 +32,6 @@ import org.junit.runners.Parameterized; import static org.apache.iceberg.PartitionSpec.unpartitioned; -import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; @RunWith(Parameterized.class) From b863c86e1fb99a7b6d01a0559a3742d321fb8042 Mon Sep 17 00:00:00 2001 From: Jack Ye Date: Thu, 17 Jun 2021 09:38:21 -0700 Subject: [PATCH 4/4] fix error message --- core/src/main/java/org/apache/iceberg/SchemaUpdate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java index d4201fe0093b..b3c0a1cf9f6e 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaUpdate.java +++ b/core/src/main/java/org/apache/iceberg/SchemaUpdate.java @@ -458,7 +458,7 @@ private static Schema applyChanges(Schema schema, List deletes, Set freshIdentifierFieldIds = Sets.newHashSet(); for (String name : identifierFieldNames) { Preconditions.checkArgument(nameToId.containsKey(name), - "Cannot add field %s as an identifier field: not found in current schema or added columns"); + "Cannot add field %s as an identifier field: not found in current schema or added columns", name); freshIdentifierFieldIds.add(nameToId.get(name)); }