Skip to content

Commit

Permalink
PARQUET-968 Implement feedback
Browse files Browse the repository at this point in the history
Update the proto to parquet schema converter for MAP fields so that it follows the scec: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
This came as feedback from the Amazon Athena team.
  • Loading branch information
Constantin Muraru committed Aug 27, 2017
1 parent 121c0b7 commit 8e6f8d1
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,8 @@ public MapConverter(Message.Builder parentBuilder, Descriptors.FieldDescriptor f
}

Type parquetSchema;
if (parquetType.asGroupType().containsField("map")){
parquetSchema = parquetType.asGroupType().getType("map");
if (parquetType.asGroupType().containsField("key_value")){
parquetSchema = parquetType.asGroupType().getType("key_value");
} else {
throw new ParquetDecodingException("Expected map but got: " + parquetType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ private <T> GroupBuilder<GroupBuilder<T>> addMapField(Descriptors.FieldDescripto
ParquetType mapKeyParquetType = getParquetType(fields.get(0));

GroupBuilder<GroupBuilder<GroupBuilder<T>>> group = builder
.group(getRepetition(descriptor)).as(OriginalType.MAP)
.group(Type.Repetition.REPEATED).as(OriginalType.MAP_KEY_VALUE)
.group(Type.Repetition.REQUIRED).as(OriginalType.MAP)
.group(Type.Repetition.REPEATED) // key_value wrapper
.primitive(mapKeyParquetType.primitiveType, Type.Repetition.REQUIRED).as(mapKeyParquetType.originalType).named("key");

return addField(fields.get(1), group).named("value")
.named("map");
.named("key_value");
}

private ParquetType getParquetType(Descriptors.FieldDescriptor fieldDescriptor) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ private GroupType getGroupType(Type type) {
}

if (type.getOriginalType() == OriginalType.MAP) {
return type.asGroupType().getType("map").asGroupType().getType("value").asGroupType();
return type.asGroupType().getType("key_value").asGroupType().getType("value").asGroupType();
}

return type.asGroupType();
Expand Down Expand Up @@ -369,15 +369,15 @@ public MapWriter(FieldWriter keyWriter, FieldWriter valueWriter) {
final void writeRawValue(Object value) {
recordConsumer.startGroup();

recordConsumer.startField("map", 0); // This is the wrapper group for the map field
recordConsumer.startField("key_value", 0); // This is the wrapper group for the map field
for(MapEntry<?, ?> entry : (Collection<MapEntry<?, ?>>) value) {
recordConsumer.startGroup();
keyWriter.writeField(entry.getKey());
valueWriter.writeField(entry.getValue());
recordConsumer.endGroup();
}

recordConsumer.endField("map", 0);
recordConsumer.endField("key_value", 0);

recordConsumer.endGroup();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ public void testProto3ConvertAllDatatypes() throws Exception {
" optional binary optionalEnum (ENUM) = 18;" +
" optional int32 someInt32 = 19;" +
" optional binary someString (UTF8) = 20;" +
" repeated group optionalMap (MAP) = 21 {\n" +
" repeated group map (MAP_KEY_VALUE) {\n" +
" required group optionalMap (MAP) = 21 {\n" +
" repeated group key_value {\n" +
" required int64 key;\n" +
" optional group value {\n" +
" optional int32 someId = 3;\n" +
Expand Down

0 comments on commit 8e6f8d1

Please sign in to comment.