From cfe64b84a9fbacd506f3e18e9db46299e807bc0f Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 17 Feb 2025 12:22:40 -0800 Subject: [PATCH 1/4] API: Reject unknown type for required fields, validate defaults. --- .../java/org/apache/iceberg/types/Types.java | 9 +++- .../org/apache/iceberg/TestSchemaUpdate.java | 43 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index d1b4241bcba6..691240dfd9f3 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -680,6 +680,10 @@ private NestedField( Literal writeDefault) { Preconditions.checkNotNull(name, "Name cannot be null"); Preconditions.checkNotNull(type, "Type cannot be null"); + Preconditions.checkArgument( + isOptional || !type.equals(UnknownType.get()), + "Cannot create required field with unknown type: %s", + name); this.isOptional = isOptional; this.id = id; this.name = name; @@ -694,7 +698,10 @@ private static Literal castDefault(Literal defaultValue, Type type) { throw new IllegalArgumentException( String.format("Invalid default value for %s: %s (must be null)", type, defaultValue)); } else if (defaultValue != null) { - return defaultValue.to(type); + Literal typedDefault = defaultValue.to(type); + Preconditions.checkArgument( + typedDefault != null, "Cannot cast default value to %s: %s", type, defaultValue); + return typedDefault; } return null; diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java index 38cf6da18a3e..d1591f80d836 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java @@ -2439,4 +2439,47 @@ public void testMoveDeletedNestedStructFieldToFirst() { assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); } + + @Test + public void testAddUnknown() { + Schema schema = new Schema(required(1, "id", Types.LongType.get())); + Schema expected = + new Schema( + required(1, "id", Types.LongType.get()), optional(2, "unk", Types.UnknownType.get())); + + Schema actual = + new SchemaUpdate(schema, schema.highestFieldId()) + .addColumn("unk", Types.UnknownType.get()) + .apply(); + + assertThat(actual.asStruct()).isEqualTo(expected.asStruct()); + } + + @Test + public void testAddUnknownNonNullDefault() { + Schema schema = new Schema(required(1, "id", Types.LongType.get())); + + assertThatThrownBy( + () -> + new SchemaUpdate(schema, schema.highestFieldId()) + .allowIncompatibleChanges() + .addColumn("unk", Types.UnknownType.get(), Literal.of("string!")) + .apply()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot cast default value to unknown: \"string!\""); + } + + @Test + public void testAddRequiredUnknown() { + Schema schema = new Schema(required(1, "id", Types.LongType.get())); + + assertThatThrownBy( + () -> + new SchemaUpdate(schema, schema.highestFieldId()) + .allowIncompatibleChanges() + .addRequiredColumn("unk", Types.UnknownType.get()) + .apply()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot create required field with unknown type: unk"); + } } From 1cb085e502bc66309698b1d1b465fd9b36030b09 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 17 Feb 2025 13:12:22 -0800 Subject: [PATCH 2/4] Fix TestSchema. --- .../java/org/apache/iceberg/TestSchema.java | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index 46db60852b3f..b95e52de33ac 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -30,6 +30,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.FieldSource; @@ -41,8 +42,7 @@ public class TestSchema { ImmutableList.of( Types.TimestampNanoType.withoutZone(), Types.TimestampNanoType.withZone(), - Types.VariantType.get(), - Types.UnknownType.get()); + Types.VariantType.get()); private static final Schema INITIAL_DEFAULT_SCHEMA = new Schema( @@ -122,6 +122,56 @@ private static Stream supportedTypes() { .mapToObj(supportedVersion -> Arguments.of(type, supportedVersion))); } + @Test + public void testUnknownSupport() { + // this needs a different schema becuase it cannot be used in required fields + Schema schemaWithUnknown = + new Schema( + Types.NestedField.required(1, "id", Types.LongType.get()), + Types.NestedField.optional(2, "top", Types.UnknownType.get()), + Types.NestedField.optional( + 3, "arr", Types.ListType.ofOptional(4, Types.UnknownType.get())), + Types.NestedField.required( + 5, + "struct", + Types.StructType.of( + Types.NestedField.optional(6, "inner_op", Types.UnknownType.get()), + Types.NestedField.optional( + 7, + "inner_map", + Types.MapType.ofOptional( + 8, 9, Types.StringType.get(), Types.UnknownType.get())), + Types.NestedField.optional( + 10, + "struct_arr", + Types.StructType.of( + Types.NestedField.optional(11, "deep", Types.UnknownType.get())))))); + + assertThatThrownBy(() -> Schema.checkCompatibility(schemaWithUnknown, 2)) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Invalid schema for v%s:\n" + + "- Invalid type for top: %s is not supported until v%s\n" + + "- Invalid type for arr.element: %s is not supported until v%s\n" + + "- Invalid type for struct.inner_op: %s is not supported until v%s\n" + + "- Invalid type for struct.inner_map.value: %s is not supported until v%s\n" + + "- Invalid type for struct.struct_arr.deep: %s is not supported until v%s", + 2, + Types.UnknownType.get(), + MIN_FORMAT_VERSIONS.get(Type.TypeID.UNKNOWN), + Types.UnknownType.get(), + MIN_FORMAT_VERSIONS.get(Type.TypeID.UNKNOWN), + Types.UnknownType.get(), + MIN_FORMAT_VERSIONS.get(Type.TypeID.UNKNOWN), + Types.UnknownType.get(), + MIN_FORMAT_VERSIONS.get(Type.TypeID.UNKNOWN), + Types.UnknownType.get(), + MIN_FORMAT_VERSIONS.get(Type.TypeID.UNKNOWN)); + + assertThatCode(() -> Schema.checkCompatibility(schemaWithUnknown, 3)) + .doesNotThrowAnyException(); + } + @ParameterizedTest @MethodSource("supportedTypes") public void testTypeSupported(Type type, int supportedVersion) { From 34846cf1d9d5151c8e0df7d7c0b979967cdb7f07 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Mon, 17 Feb 2025 13:13:16 -0800 Subject: [PATCH 3/4] Fix TestPartitionSpecValidation. --- .../java/org/apache/iceberg/TestPartitionSpecValidation.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index ba7010f196a7..125b0b519fbc 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -37,7 +37,7 @@ public class TestPartitionSpecValidation { NestedField.required(5, "another_d", Types.TimestampType.withZone()), NestedField.required(6, "s", Types.StringType.get()), NestedField.required(7, "v", Types.VariantType.get()), - NestedField.required(8, "u", Types.UnknownType.get())); + NestedField.optional(8, "u", Types.UnknownType.get())); @Test public void testMultipleTimestampPartitions() { From 88d998a76343c3756787b3875520369b43b0d220 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Tue, 18 Feb 2025 08:20:49 -0800 Subject: [PATCH 4/4] Update api/src/test/java/org/apache/iceberg/TestSchema.java Co-authored-by: Eduard Tudenhoefner --- api/src/test/java/org/apache/iceberg/TestSchema.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/TestSchema.java b/api/src/test/java/org/apache/iceberg/TestSchema.java index b95e52de33ac..be0c7ec103ff 100644 --- a/api/src/test/java/org/apache/iceberg/TestSchema.java +++ b/api/src/test/java/org/apache/iceberg/TestSchema.java @@ -124,7 +124,7 @@ private static Stream supportedTypes() { @Test public void testUnknownSupport() { - // this needs a different schema becuase it cannot be used in required fields + // this needs a different schema because it cannot be used in required fields Schema schemaWithUnknown = new Schema( Types.NestedField.required(1, "id", Types.LongType.get()),