diff --git a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaVisitor.java b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaVisitor.java index e6515f1180..6673108e63 100644 --- a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaVisitor.java +++ b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaVisitor.java @@ -52,7 +52,7 @@ public static T visit(Schema schema, AvroSchemaVisitor visitor) { case UNION: List types = schema.getTypes(); List options = Lists.newArrayListWithExpectedSize(types.size()); - if (AvroSchemaUtil.isOptionSchema(schema)) { + if (AvroSchemaUtil.isOptionSchema(schema) || AvroSchemaUtil.isSingleTypeUnion(schema)) { for (Schema type : types) { options.add(visit(type, visitor)); } diff --git a/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroUnions.java b/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroUnions.java index eb17e45713..31c2b6b9cd 100644 --- a/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroUnions.java +++ b/spark/src/test/java/org/apache/iceberg/spark/data/TestSparkAvroUnions.java @@ -178,7 +178,7 @@ public void writeAndValidateSingleTypeUnion() throws IOException { } @Test - public void writeAndValidateNestedSingleTypeUnion() throws IOException { + public void writeAndValidateNestedSingleTypeUnion1() throws IOException { org.apache.avro.Schema avroSchema = SchemaBuilder.record("root") .fields() .name("col1") @@ -217,6 +217,57 @@ public void writeAndValidateNestedSingleTypeUnion() throws IOException { } } + @Test + public void writeAndValidateNestedSingleTypeUnion2() throws IOException { + org.apache.avro.Schema avroSchema = SchemaBuilder.record("root") + .fields() + .name("outerUnion") + .type() + .unionOf() + .record("r") + .fields() + .name("innerUnion") + .type() + .unionOf() + .stringType() + .endUnion() + .noDefault() + .endRecord() + .endUnion() + .noDefault() + .endRecord(); + + GenericData.Record unionRecord1 = new GenericData.Record(avroSchema); + GenericData.Record innerRecord1 = new GenericData.Record(avroSchema.getFields().get(0).schema().getTypes().get(0)); + innerRecord1.put("innerUnion", "foo"); + unionRecord1.put("outerUnion", innerRecord1); + + GenericData.Record unionRecord2 = new GenericData.Record(avroSchema); + GenericData.Record innerRecord2 = new GenericData.Record(avroSchema.getFields().get(0).schema().getTypes().get(0)); + innerRecord2.put("innerUnion", "bar"); + unionRecord2.put("outerUnion", innerRecord2); + + File testFile = temp.newFile(); + try (DataFileWriter writer = new DataFileWriter<>(new GenericDatumWriter<>())) { + writer.create(avroSchema, testFile); + writer.append(unionRecord1); + writer.append(unionRecord2); + } + + Schema expectedSchema = AvroSchemaUtil.toIceberg(avroSchema); + + List rows; + try (AvroIterable reader = Avro.read(Files.localInput(testFile)) + .createReaderFunc(SparkAvroReader::new) + .project(expectedSchema) + .build()) { + rows = Lists.newArrayList(reader); + + Assert.assertEquals("foo", rows.get(0).getStruct(0, 1).getUTF8String(0).toString()); + Assert.assertEquals("bar", rows.get(1).getStruct(0, 1).getUTF8String(0).toString()); + } + } + @Test public void writeAndValidateSingleTypeUnionOfComplexType() throws IOException { org.apache.avro.Schema avroSchema = SchemaBuilder.record("root")