From 158d11ac809fa9ea6105e32fa8901aa6411ffb20 Mon Sep 17 00:00:00 2001 From: Raymond Zhang Date: Fri, 3 Jun 2022 16:05:01 -0700 Subject: [PATCH 1/4] Revert "Disable default value preserving (#106)" This reverts commit 6a8352567f0dad7b3fe8e225c0260bb6d756cc08. --- .../src/main/java/org/apache/iceberg/SchemaParser.java | 10 ++++------ .../iceberg/TestSchemaParserForDefaultValues.java | 3 +-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/SchemaParser.java b/core/src/main/java/org/apache/iceberg/SchemaParser.java index 3664e76ac1..549c64948a 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaParser.java +++ b/core/src/main/java/org/apache/iceberg/SchemaParser.java @@ -100,8 +100,7 @@ static void toJson(Types.StructType struct, JsonGenerator generator) throws IOEx generator.writeBooleanField(REQUIRED, field.isRequired()); generator.writeFieldName(TYPE); toJson(field.type(), generator); - // BDP-11826: Disable serializing default value -// writeDefaultValue(field.getDefaultValue(), field.type(), generator); + writeDefaultValue(field.getDefaultValue(), field.type(), generator); if (field.doc() != null) { generator.writeStringField(DOC, field.doc()); } @@ -249,14 +248,13 @@ private static Types.StructType structFromJson(JsonNode json) { int id = JsonUtil.getInt(ID, field); String name = JsonUtil.getString(NAME, field); Type type = typeFromJson(field.get(TYPE)); - // BDP-11826: Disable deserializing default value -// Object defaultValue = defaultValueFromJson(field, type); + Object defaultValue = defaultValueFromJson(field, type); String doc = JsonUtil.getStringOrNull(DOC, field); boolean isRequired = JsonUtil.getBool(REQUIRED, field); if (isRequired) { - fields.add(Types.NestedField.required(id, name, type, doc)); + fields.add(Types.NestedField.required(id, name, type, defaultValue, doc)); } else { - fields.add(Types.NestedField.optional(id, name, type, doc)); + fields.add(Types.NestedField.optional(id, name, type, defaultValue, doc)); } } diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaParserForDefaultValues.java b/core/src/test/java/org/apache/iceberg/TestSchemaParserForDefaultValues.java index 690c0dfe05..853b9ad2b4 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaParserForDefaultValues.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaParserForDefaultValues.java @@ -31,7 +31,6 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.types.Types.NestedField; import org.junit.Assert; -import org.junit.Ignore; import org.junit.Test; import static org.apache.avro.Schema.Type.BOOLEAN; @@ -43,7 +42,7 @@ import static org.apache.avro.Schema.Type.NULL; import static org.apache.avro.Schema.Type.STRING; -@Ignore("BDP-11826: Disable default value preserving in iceberg schema") + public class TestSchemaParserForDefaultValues { private void assertEqualStructs(org.apache.iceberg.Schema expected, org.apache.iceberg.Schema actual) { From 9cb7273dc39c454f0d71750a3bb18136b0f359fc Mon Sep 17 00:00:00 2001 From: Raymond Zhang Date: Fri, 3 Jun 2022 18:06:28 -0700 Subject: [PATCH 2/4] [Core] Fix/Refactor SchemaParser to fix multiple bugs --- .../java/org/apache/iceberg/SchemaParser.java | 162 ++---------------- .../apache/iceberg/avro/AvroSchemaUtil.java | 2 +- .../iceberg/avro/TestSchemaConversions.java | 84 +++++++++ 3 files changed, 98 insertions(+), 150 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/SchemaParser.java b/core/src/main/java/org/apache/iceberg/SchemaParser.java index 549c64948a..3c5ce348fc 100644 --- a/core/src/main/java/org/apache/iceberg/SchemaParser.java +++ b/core/src/main/java/org/apache/iceberg/SchemaParser.java @@ -19,8 +19,6 @@ package org.apache.iceberg; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; @@ -28,15 +26,11 @@ import com.github.benmanes.caffeine.cache.Caffeine; import java.io.IOException; import java.io.StringWriter; -import java.math.BigDecimal; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; +import org.apache.avro.util.internal.JacksonUtils; +import org.apache.iceberg.avro.AvroSchemaUtil; import org.apache.iceberg.exceptions.RuntimeIOException; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -69,22 +63,15 @@ private SchemaParser() { private static final String VALUE_REQUIRED = "value-required"; private static final String DEFAULT = "default"; - private static final List primitiveClasses = Arrays.asList(Boolean.class, Integer.class, Long.class, - Float.class, Double.class, CharSequence.class, String.class, java.util.UUID.class, BigDecimal.class); - private static void writeDefaultValue(Object defaultValue, Type type, JsonGenerator generator) throws IOException { if (defaultValue == null) { return; } generator.writeFieldName(DEFAULT); - if (type.isListType()) { - generator.writeString(defaultValueToJsonString((List) defaultValue)); - } else if (type.isStructType() || type.isMapType()) { - generator.writeString(defaultValueToJsonString((Map) defaultValue)); - } else if (isFixedOrBinary(type)) { - generator.writeString(defaultValueToJsonString((byte[]) defaultValue)); + if (isFixedOrBinary(type)) { + generator.writeRawValue(defaultValueToJsonString((byte[]) defaultValue)); } else { - generator.writeString(defaultValueToJsonString(defaultValue)); + generator.writeRawValue(defaultValueToJsonString(defaultValue)); } } @@ -216,21 +203,16 @@ private static Object defaultValueFromJson(JsonNode field, Type type) { return null; } - String defaultValueString = field.get(DEFAULT).asText(); - if (isFixedOrBinary(type)) { - return defaultValueFromJsonBytesField(defaultValueString); - } - - if (type.isPrimitiveType()) { - return primitiveDefaultValueFromJsonString(defaultValueString, type); + try { + return field.get(DEFAULT).binaryValue(); + } catch (IOException e) { + throw new RuntimeException(e); + } } - try { - return defaultValueFromJsonString(defaultValueString, type); - } catch (IOException e) { - throw new RuntimeException(e); - } + return AvroSchemaUtil.convertToJavaDefaultValue(JacksonUtils.toObject(field.get(DEFAULT), + AvroSchemaUtil.convert(type))); } private static Types.StructType structFromJson(JsonNode json) { @@ -311,19 +293,6 @@ public static Schema fromJson(String json) { }); } - private static String defaultValueToJsonString(Map map) { - Map jsonStringElementsMap = new LinkedHashMap<>(); - map.entrySet().forEach( - entry -> jsonStringElementsMap.put(entry.getKey(), defaultValueToJsonString(entry.getValue()))); - return defaultValueToJsonString(jsonStringElementsMap); - } - - private static String defaultValueToJsonString(List list) { - List jsonStringItemsList = new ArrayList<>(); - list.forEach(item -> jsonStringItemsList.add(defaultValueToJsonString(item))); - return defaultValueToJsonString(jsonStringItemsList); - } - private static String defaultValueToJsonString(byte[] bytes) { try { return JsonUtil.mapper().writeValueAsString(ByteBuffer.wrap(bytes)); @@ -333,115 +302,10 @@ private static String defaultValueToJsonString(byte[] bytes) { } private static String defaultValueToJsonString(Object value) { - if (value == null) { - // Json string representation of null object is string "null" - return "null"; - } - if (isPrimitiveClass(value)) { - return value.toString(); - } - - try { - return JsonUtil.mapper().writeValueAsString(new SerDeValue(value)); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } - - private static boolean isPrimitiveClass(Object value) { - return primitiveClasses.contains(value.getClass()); - } - - private static Object defaultValueFromJsonBytesField(String value) { try { - return JsonUtil.mapper().readValue(value, ByteBuffer.class).array(); + return JsonUtil.mapper().writeValueAsString(value); } catch (JsonProcessingException e) { throw new RuntimeException(e); } } - - private static Object defaultValueFromJsonString(String jsonString, Type type) throws IOException { - Preconditions.checkArgument(!type.isPrimitiveType(), "jsonString %s is for primitive type %s", jsonString, type); - Object jsonStringCollection = JsonUtil.mapper().readValue(jsonString, SerDeValue.class).getValue(); - - if (type.isListType()) { - Preconditions.checkArgument(jsonStringCollection instanceof List, - "deserialized Json object: (%s) is not List for List type", jsonStringCollection); - List list = new ArrayList<>(); - Type elementType = type.asListType().elementType(); - for (String item : (List) jsonStringCollection) { - list.add(elementType.isPrimitiveType() ? primitiveDefaultValueFromJsonString(item, elementType) : - JsonUtil.mapper().readValue(item, SerDeValue.class).getValue()); - } - return list; - } - - Preconditions.checkArgument((type.isMapType() || type.isStructType()) && jsonStringCollection instanceof Map, - "deserialized Json object: (%s) is not Map for type: %s", jsonStringCollection, type); - - // map (MapType or StructType) case - Map map = new HashMap<>(); - Map jsonStringMap = (HashMap) jsonStringCollection; - for (Map.Entry entry : jsonStringMap.entrySet()) { - String key = entry.getKey().toString(); - String valueString = entry.getValue().toString(); - Type elementType = type.isMapType() ? type.asMapType().valueType() : type.asStructType().field(key).type(); - Object value = elementType.isPrimitiveType() ? primitiveDefaultValueFromJsonString(valueString, elementType) - : JsonUtil.mapper().readValue(valueString, SerDeValue.class).getValue(); - map.put(key, value); - } - return map; - } - - private static Object primitiveDefaultValueFromJsonString(String jsonString, Type type) { - switch (type.typeId()) { - case BOOLEAN: - return Boolean.valueOf(jsonString); - case INTEGER: - case DATE: - return Integer.valueOf(jsonString); - case DECIMAL: - return BigDecimal.valueOf(Long.valueOf(jsonString)); - case LONG: - case TIME: - case TIMESTAMP: - return Long.valueOf(jsonString); - case FLOAT: - return Float.valueOf(jsonString); - case DOUBLE: - return Double.valueOf(jsonString); - case STRING: - return jsonString; - case UUID: - return java.util.UUID.fromString(jsonString); - case FIXED: - case BINARY: - return defaultValueFromJsonBytesField(jsonString); - default: - throw new RuntimeException("non-primitive type: " + type); - } - } - - /** - * SerDeValue class: - * This is used so that the value to serialize is specified - * as a property, so that the type information gets included in - * the serialized String. - */ - private static class SerDeValue { - // Name of the field used in the intermediate JSON representation - private static final String VALUE_FIELD = "__value__"; - - @JsonProperty(VALUE_FIELD) - private final Object value; - - @JsonCreator - private SerDeValue(@JsonProperty(VALUE_FIELD) Object value) { - this.value = value; - } - - private Object getValue() { - return value; - } - } } diff --git a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java index f6866088da..edc6583413 100644 --- a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java +++ b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java @@ -479,7 +479,7 @@ static boolean hasNonNullDefaultValue(Schema.Field field) { !(field.defaultVal() instanceof String && ((String) field.defaultVal()).equalsIgnoreCase("null")); } - static Object convertToJavaDefaultValue(Object defaultValue) { + public static Object convertToJavaDefaultValue(Object defaultValue) { if (defaultValue instanceof List) { return ((List) defaultValue).stream() .map(AvroSchemaUtil::convertToJavaDefaultValue) diff --git a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java index b3d1b769aa..c1366f2af2 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java @@ -20,9 +20,11 @@ package org.apache.iceberg.avro; import java.util.List; +import java.util.stream.IntStream; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; +import org.apache.iceberg.SchemaParser; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.types.Type; @@ -434,4 +436,86 @@ public void testConversionOfRecordDefaultWithOptionalNestedField2() { "default value: {mapField={foo=bar, x=y}, recordField=null}, \n" + "}", iSchema.toString()); } + + @Test + public void testVariousTypesDefaultValues() { + String schemaString = "{\n" + + " \"namespace\": \"com.razhang\",\n" + + " \"type\": \"record\",\n" + + " \"name\": \"RAZHANG\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\": \"f1\",\n" + + " \"type\": \"string\",\n" + + " \"default\": \"foo\"\n" + + " },\n" + + " {\n" + + " \"name\": \"f2\",\n" + + " \"type\": \"int\",\n" + + " \"default\": 1\n" + + " },\n" + + " {\n" + + " \"name\": \"f3\",\n" + + " \"type\": {\n" + + " \"type\": \"map\",\n" + + " \"values\" : \"int\"\n" + + " },\n" + + " \"default\": {\"a\": 1}\n" + + " },\n" + + " {\n" + + " \"name\": \"f4\",\n" + + " \"type\": {\n" + + " \"type\": \"array\",\n" + + " \"items\" : \"int\"\n" + + " },\n" + + " \"default\": [1, 2, 3]\n" + + " },\n" + + " {\n" + + " \"name\": \"f5\",\n" + + " \"type\": {\n" + + " \"type\": \"record\",\n" + + " \"name\": \"F5\",\n" + + " \"fields\" : [\n" + + " {\"name\": \"ff1\", \"type\": \"long\"},\n" + + " {\"name\": \"ff2\", \"type\": [\"null\", \"string\"]}\n" + + " ]\n" + + " },\n" + + " \"default\": {\n" + + " \"ff1\": 999,\n" + + " \"ff2\": null\n" + + " }\n" + + " },\n" + + " {\n" + + " \"name\": \"f6\",\n" + + " \"type\": {\n" + + " \"type\": \"map\",\n" + + " \"values\": {\n" + + " \"type\": \"array\",\n" + + " \"items\" : \"int\"\n" + + " }\n" + + " },\n" + + " \"default\": {\"key\": [1, 2, 3]}\n" + + " },\n" + + " {\n" + + " \"name\": \"f7\",\n" + + " \"type\": {\n" + + " \"type\": \"fixed\",\n" + + " \"name\": \"md5\",\n" + + " \"size\": 2\n" + + " },\n" + + " \"default\": \"fF\"\n" + + " }\n" + + " ]\n" + + "}"; + Schema schema = new Schema.Parser().parse(schemaString); + org.apache.iceberg.Schema iSchema = AvroSchemaUtil.toIceberg(schema); + + System.out.println(iSchema); + + String schemaJson = SchemaParser.toJson(iSchema); + org.apache.iceberg.Schema roundTripiSchema = SchemaParser.fromJson(schemaJson); + + Assert.assertTrue(IntStream.range(0, roundTripiSchema.columns().size()) + .allMatch(i -> roundTripiSchema.columns().get(i).equals(iSchema.columns().get(i)))); + } } From a2560ca00e9d873a281eb92196f56321b3d8dc27 Mon Sep 17 00:00:00 2001 From: Limian Zhang <75445426+rzhang10@users.noreply.github.com> Date: Fri, 3 Jun 2022 18:39:28 -0700 Subject: [PATCH 3/4] Fix NestedField equality on defaults --- api/src/main/java/org/apache/iceberg/types/Types.java | 3 ++- .../java/org/apache/iceberg/avro/TestSchemaConversions.java | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index 8218b83552..65e15e12a5 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -592,7 +592,8 @@ public boolean equals(Object o) { return false; } else if (!name.equals(that.name)) { return false; - } else if (!Objects.equals(defaultValue, that.defaultValue)) { + } else if (!Objects.equals(defaultValue, that.defaultValue) + && !Arrays.equals((byte[]) defaultValue, (byte[]) that.defaultValue)) { return false; } else if (!Objects.equals(doc, that.doc)) { return false; diff --git a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java index c1366f2af2..b1ebe5bd34 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java @@ -510,8 +510,6 @@ public void testVariousTypesDefaultValues() { Schema schema = new Schema.Parser().parse(schemaString); org.apache.iceberg.Schema iSchema = AvroSchemaUtil.toIceberg(schema); - System.out.println(iSchema); - String schemaJson = SchemaParser.toJson(iSchema); org.apache.iceberg.Schema roundTripiSchema = SchemaParser.fromJson(schemaJson); From 7299d5aa12c99fa24bf100ab1494975792ce7dbd Mon Sep 17 00:00:00 2001 From: Raymond Zhang Date: Mon, 6 Jun 2022 17:42:40 -0700 Subject: [PATCH 4/4] Fix checkstyle --- api/src/main/java/org/apache/iceberg/types/Types.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index 65e15e12a5..ef20a4e0c7 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -592,8 +592,8 @@ public boolean equals(Object o) { return false; } else if (!name.equals(that.name)) { return false; - } else if (!Objects.equals(defaultValue, that.defaultValue) - && !Arrays.equals((byte[]) defaultValue, (byte[]) that.defaultValue)) { + } else if (!Objects.equals(defaultValue, that.defaultValue) && + !Arrays.equals((byte[]) defaultValue, (byte[]) that.defaultValue)) { return false; } else if (!Objects.equals(doc, that.doc)) { return false;