From 8e6f8d1c31126599aa1ff2fe6c759e47186c3e41 Mon Sep 17 00:00:00 2001 From: Constantin Muraru Date: Mon, 28 Aug 2017 01:05:08 +0300 Subject: [PATCH] PARQUET-968 Implement feedback Update the proto to parquet schema converter for MAP fields so that it follows the scec: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists This came as feedback from the Amazon Athena team. --- .../org/apache/parquet/proto/ProtoMessageConverter.java | 4 ++-- .../java/org/apache/parquet/proto/ProtoSchemaConverter.java | 6 +++--- .../java/org/apache/parquet/proto/ProtoWriteSupport.java | 6 +++--- .../org/apache/parquet/proto/ProtoSchemaConverterTest.java | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java index e478652207..953994f1c1 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoMessageConverter.java @@ -447,8 +447,8 @@ public MapConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor f } Type parquetSchema; - if (parquetType.asGroupType().containsField("map")){ - parquetSchema = parquetType.asGroupType().getType("map"); + if (parquetType.asGroupType().containsField("key_value")){ + parquetSchema = parquetType.asGroupType().getType("key_value"); } else { throw new ParquetDecodingException("Expected map but got: " + parquetType); } diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java index a5b4edebe5..f3dd11db38 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoSchemaConverter.java @@ -137,12 +137,12 @@ private GroupBuilder> addMapField(Descriptors.FieldDescripto ParquetType mapKeyParquetType = getParquetType(fields.get(0)); GroupBuilder>> group = builder - .group(getRepetition(descriptor)).as(OriginalType.MAP) - .group(Type.Repetition.REPEATED).as(OriginalType.MAP_KEY_VALUE) + .group(Type.Repetition.REQUIRED).as(OriginalType.MAP) + .group(Type.Repetition.REPEATED) // key_value wrapper .primitive(mapKeyParquetType.primitiveType, Type.Repetition.REQUIRED).as(mapKeyParquetType.originalType).named("key"); return addField(fields.get(1), group).named("value") - .named("map"); + .named("key_value"); } private ParquetType getParquetType(Descriptors.FieldDescriptor fieldDescriptor) { diff --git a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java index 31e386daee..8e2b4aeb44 100644 --- a/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java +++ b/parquet-protobuf/src/main/java/org/apache/parquet/proto/ProtoWriteSupport.java @@ -207,7 +207,7 @@ private GroupType getGroupType(Type type) { } if (type.getOriginalType() == OriginalType.MAP) { - return type.asGroupType().getType("map").asGroupType().getType("value").asGroupType(); + return type.asGroupType().getType("key_value").asGroupType().getType("value").asGroupType(); } return type.asGroupType(); @@ -369,7 +369,7 @@ public MapWriter(FieldWriter keyWriter, FieldWriter valueWriter) { final void writeRawValue(Object value) { recordConsumer.startGroup(); - recordConsumer.startField("map", 0); // This is the wrapper group for the map field + recordConsumer.startField("key_value", 0); // This is the wrapper group for the map field for(MapEntry entry : (Collection>) value) { recordConsumer.startGroup(); keyWriter.writeField(entry.getKey()); @@ -377,7 +377,7 @@ final void writeRawValue(Object value) { recordConsumer.endGroup(); } - recordConsumer.endField("map", 0); + recordConsumer.endField("key_value", 0); recordConsumer.endGroup(); } diff --git a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java index 34f2f23f2f..70bc1f79d9 100644 --- a/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java +++ b/parquet-protobuf/src/test/java/org/apache/parquet/proto/ProtoSchemaConverterTest.java @@ -103,8 +103,8 @@ public void testProto3ConvertAllDatatypes() throws Exception { " optional binary optionalEnum (ENUM) = 18;" + " optional int32 someInt32 = 19;" + " optional binary someString (UTF8) = 20;" + - " repeated group optionalMap (MAP) = 21 {\n" + - " repeated group map (MAP_KEY_VALUE) {\n" + + " required group optionalMap (MAP) = 21 {\n" + + " repeated group key_value {\n" + " required int64 key;\n" + " optional group value {\n" + " optional int32 someId = 3;\n" +