-
Notifications
You must be signed in to change notification settings - Fork 36
Fix reordering avro nullable types in MergeHiveSchemaWithAvro #140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: li-0.11.x
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ | |
| import org.apache.avro.JsonProperties; | ||
| import org.apache.avro.LogicalTypes; | ||
| import org.apache.avro.Schema; | ||
| import org.apache.avro.Schema.Type; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; | ||
| import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; | ||
|
|
@@ -97,12 +98,12 @@ public Schema.Field field(String name, TypeInfo field, Schema.Field partner, Sch | |
| * If the schema is not an option schema or if there is no default value, schema is returned as-is | ||
| */ | ||
| private Schema reorderOptionIfRequired(Schema schema, Object defaultValue) { | ||
| if (AvroSchemaUtil.isOptionSchema(schema) && defaultValue != null) { | ||
| boolean isNullFirstOption = schema.getTypes().get(0).getType() == Schema.Type.NULL; | ||
| if (isNullFirstOption && defaultValue.equals(JsonProperties.NULL_VALUE)) { | ||
| return schema; | ||
| if (AvroSchemaUtil.isOptionSchema(schema)) { | ||
| int nullIndex = AvroSchemaUtil.getNullIndex(schema); | ||
| if (defaultValue != null && !defaultValue.equals(JsonProperties.NULL_VALUE)) { | ||
| return Schema.createUnion(schema.getTypes().get(1 - nullIndex), Schema.create(Type.NULL)); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey @rzhang10 , I am having a hard time reasoning about these changes. Can you please include a description of how these changes help fix the issue. Can you also update the Java doc of this method with the new changes?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok I understand why this is needed. Previously |
||
| } else { | ||
| return Schema.createUnion(schema.getTypes().get(1), schema.getTypes().get(0)); | ||
| return Schema.createUnion(Schema.create(Type.NULL), schema.getTypes().get(1 - nullIndex)); | ||
| } | ||
| } else { | ||
| return schema; | ||
|
|
@@ -143,10 +144,14 @@ public Schema union(UnionTypeInfo union, Schema partner, List<Schema> results) { | |
| @Override | ||
| public Schema primitive(PrimitiveTypeInfo primitive, Schema partner) { | ||
| boolean shouldResultBeOptional = partner == null || AvroSchemaUtil.isOptionSchema(partner); | ||
| int nullIndex = 0; | ||
| if (partner != null && AvroSchemaUtil.isOptionSchema(partner)) { | ||
| nullIndex = AvroSchemaUtil.getNullIndex(partner); | ||
| } | ||
| Schema hivePrimitive = hivePrimitiveToAvro(primitive); | ||
| // if there was no matching Avro primitive, use the Hive primitive | ||
| Schema result = partner == null ? hivePrimitive : checkCompatibilityAndPromote(hivePrimitive, partner); | ||
| return shouldResultBeOptional ? AvroSchemaUtil.toOption(result) : result; | ||
| return shouldResultBeOptional ? AvroSchemaUtil.toOption(result, nullIndex == 1) : result; | ||
|
Comment on lines
+147
to
+154
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This handling is incomplete, it only handles cases for primitives inside containers (e.g. list), but does not handle similar cases for nested lists, maps, structs, etc. E.g. this case will fail I think we need similar handling in other places where we use the |
||
| } | ||
|
|
||
| private Schema checkCompatibilityAndPromote(Schema schema, Schema partner) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nullIndexForOptionSchemaIceberg does not use
getin method names. nulls can also be present in complex schemas, so ensure that we add theOptionSchemaconstraint in the method name itself.