Skip to content

Commit f3907c8

Browse files
infvgtdcmeehan
authored andcommitted
Resolve map(varchar, json) canonicalization bug
The map function will not sort a json object by its keys, despite the json_parse function sorting the same input. If implemented, this will sort json objects. Resolves prestodb#24207
1 parent 024d57a commit f3907c8

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

presto-main-base/src/main/java/com/facebook/presto/util/JsonUtil.java

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
*/
1414
package com.facebook.presto.util;
1515

16+
import com.facebook.airlift.json.JsonObjectMapperProvider;
1617
import com.facebook.presto.common.block.Block;
1718
import com.facebook.presto.common.block.BlockBuilder;
1819
import com.facebook.presto.common.block.SingleRowBlockWriter;
@@ -41,6 +42,7 @@
4142
import com.fasterxml.jackson.databind.ObjectMapper;
4243
import com.google.common.primitives.Shorts;
4344
import com.google.common.primitives.SignedBytes;
45+
import io.airlift.slice.DynamicSliceOutput;
4446
import io.airlift.slice.Slice;
4547
import io.airlift.slice.SliceOutput;
4648
import io.airlift.slice.Slices;
@@ -86,6 +88,7 @@
8688
import static com.fasterxml.jackson.core.JsonToken.FIELD_NAME;
8789
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
8890
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
91+
import static com.fasterxml.jackson.databind.SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS;
8992
import static com.google.common.base.Preconditions.checkArgument;
9093
import static com.google.common.base.Verify.verify;
9194
import static it.unimi.dsi.fastutil.HashCommon.arraySize;
@@ -106,6 +109,7 @@ public final class JsonUtil
106109
// `OBJECT_MAPPER.writeValueAsString(parser.readValueAsTree());` preserves input order.
107110
// Be aware. Using it arbitrarily can produce invalid json (ordered by key is required in Presto).
108111
private static final ObjectMapper OBJECT_MAPPED_UNORDERED = new ObjectMapper(JSON_FACTORY);
112+
private static final ObjectMapper OBJECT_MAPPED_SORTED = new JsonObjectMapperProvider().get().configure(ORDER_MAP_ENTRIES_BY_KEYS, true);
109113

110114
private static final int MAX_JSON_LENGTH_IN_ERROR_MESSAGE = 10_000;
111115

@@ -956,8 +960,18 @@ static BlockBuilderAppender createBlockBuilderAppender(Type type)
956960
return new VarcharBlockBuilderAppender(type);
957961
case StandardTypes.JSON:
958962
return (parser, blockBuilder, sqlFunctionProperties) -> {
959-
String json = OBJECT_MAPPED_UNORDERED.writeValueAsString(parser.readValueAsTree());
960-
JSON.writeSlice(blockBuilder, Slices.utf8Slice(json));
963+
Slice slice = Slices.utf8Slice(OBJECT_MAPPED_UNORDERED.writeValueAsString(parser.readValueAsTree()));
964+
try (JsonParser jsonParser = createJsonParser(JSON_FACTORY, slice)) {
965+
SliceOutput dynamicSliceOutput = new DynamicSliceOutput(slice.length());
966+
OBJECT_MAPPED_SORTED.writeValue((OutputStream) dynamicSliceOutput, OBJECT_MAPPED_SORTED.readValue(jsonParser, Object.class));
967+
// nextToken() returns null if the input is parsed correctly,
968+
// but will throw an exception if there are trailing characters.
969+
jsonParser.nextToken();
970+
JSON.writeSlice(blockBuilder, dynamicSliceOutput.slice());
971+
}
972+
catch (Exception e) {
973+
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, format("Cannot convert '%s' to JSON", slice.toStringUtf8()));
974+
}
961975
};
962976
case StandardTypes.ARRAY:
963977
return new ArrayBlockBuilderAppender(createBlockBuilderAppender(((ArrayType) type).getElementType()));

presto-main-base/src/test/java/com/facebook/presto/type/TestMapOperators.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -527,16 +527,13 @@ public void testJsonToMap()
527527
.put("k8", "[null]")
528528
.build());
529529

530-
// These two tests verifies that partial json cast preserves input order
531-
// The second test should never happen in real life because valid json in presto requires natural key ordering.
532-
// However, it is added to make sure that the order in the first test is not a coincidence.
533530
assertFunction("CAST(JSON '{\"k1\": {\"1klmnopq\":1, \"2klmnopq\":2, \"3klmnopq\":3, \"4klmnopq\":4, \"5klmnopq\":5, \"6klmnopq\":6, \"7klmnopq\":7}}' AS MAP<VARCHAR, JSON>)",
534531
mapType(VARCHAR, JSON),
535532
ImmutableMap.of("k1", "{\"1klmnopq\":1,\"2klmnopq\":2,\"3klmnopq\":3,\"4klmnopq\":4,\"5klmnopq\":5,\"6klmnopq\":6,\"7klmnopq\":7}"));
533+
536534
assertFunction("CAST(unchecked_to_json('{\"k1\": {\"7klmnopq\":7, \"6klmnopq\":6, \"5klmnopq\":5, \"4klmnopq\":4, \"3klmnopq\":3, \"2klmnopq\":2, \"1klmnopq\":1}}') AS MAP<VARCHAR, JSON>)",
537535
mapType(VARCHAR, JSON),
538-
ImmutableMap.of("k1", "{\"7klmnopq\":7,\"6klmnopq\":6,\"5klmnopq\":5,\"4klmnopq\":4,\"3klmnopq\":3,\"2klmnopq\":2,\"1klmnopq\":1}"));
539-
536+
ImmutableMap.of("k1", "{\"1klmnopq\":1,\"2klmnopq\":2,\"3klmnopq\":3,\"4klmnopq\":4,\"5klmnopq\":5,\"6klmnopq\":6,\"7klmnopq\":7}"));
540537
// nested array/map
541538
assertFunction("CAST(JSON '{\"1\": [1, 2], \"2\": [3, null], \"3\": [], \"5\": [null, null], \"8\": null}' AS MAP<BIGINT, ARRAY<BIGINT>>)",
542539
mapType(BIGINT, new ArrayType(BIGINT)),

0 commit comments

Comments
 (0)