From 5b6e52e0e23e1dd771f7d9d533c46c17ffbaeb53 Mon Sep 17 00:00:00 2001 From: Dan Gudmundsson Date: Fri, 13 Sep 2024 11:32:43 +0200 Subject: [PATCH] Let `json:decode/3` keep whitespaces `json:decode/3` always stripped leading whitespaces in the `Rest` binary, which could be problematic if user expected them. E.g `json:decode(<<"foo\n bar">>, ok, #{})` returned: `{<<"foo">>, ok, <<"bar">>}` instead of `{<<"foo">>, ok, <<"\n bar">>}`. If `Rest` only contains whitespaces they are removed, so that the user can match on empty binary to know if they should continue the decoding loop. E.g `json:decode(<<"foo\n ">>, ok, #{})` still returns: `{<<"foo">>, ok, <<>>}` --- lib/stdlib/src/json.erl | 7 +++++-- lib/stdlib/test/json_SUITE.erl | 27 +++++++++++++++++++++------ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/lib/stdlib/src/json.erl b/lib/stdlib/src/json.erl index 04a20148576c..fc2dd04a4c77 100644 --- a/lib/stdlib/src/json.erl +++ b/lib/stdlib/src/json.erl @@ -1393,8 +1393,11 @@ continue(<>, Original, Skip, Acc, Stack0, Decode, Value) -> end. terminate(<>, Original, Skip, Acc, Value) when ?is_ws(Byte) -> - terminate(Rest, Original, Skip + 1, Acc, Value); -terminate(<>, _Original, _Skip, Acc, Value) -> + terminate(Rest, Original, Skip, Acc, Value); +terminate(<<>>, _, _Skip, Acc, Value) -> + {Value, Acc, <<>>}; +terminate(<<_/bits>>, Original, Skip, Acc, Value) -> + <<_:Skip/binary, Rest/binary>> = Original, {Value, Acc, Rest}. -spec unexpected_utf8(binary(), non_neg_integer()) -> no_return(). diff --git a/lib/stdlib/test/json_SUITE.erl b/lib/stdlib/test/json_SUITE.erl index c27b66792052..3ac56f1ed284 100644 --- a/lib/stdlib/test/json_SUITE.erl +++ b/lib/stdlib/test/json_SUITE.erl @@ -58,6 +58,9 @@ property_escape_all/1 ]). + +-define(is_ws(X), X =:= $\s; X =:= $\t; X =:= $\r; X =:= $\n). + suite() -> [ {ct_hooks, [ts_install_cth]}, @@ -646,7 +649,7 @@ test_decode_whitespace(_Config) -> %% add extra whitespace ews(Str) -> - unicode:characters_to_binary(string:replace(Str, <<" ">>, <<" \s\t\r\n">>)). + unicode:characters_to_binary(string:replace(Str, <<" ">>, <<" \s\t\r\n">>, all)). test_decode_api(_Config) -> put(history, []), @@ -757,10 +760,15 @@ test_decode_api_stream(_Config) -> "numbers": [1, -10, 0.0, -0.0, 2.0, -2.0, 31e2, 31e-2, 0.31e2, -0.31e2, 0.13e-2], "strings": ["three", "åäö", "mixed_Ω"], "escaped": ["\\n", "\\u2603", "\\ud834\\uDD1E", "\\n\xc3\xb1"] - }#, + } + #, ok = stream_decode(Types), - Multiple = ~#12345 1.30 "String1" -0.31e2\n["an array"]12345#, + {12345, ok, B1} = json:decode(ews(~# 12345 "foo" #), ok, #{}), + <<" \s\t\r\n", _/binary>> = B1, + {<<"foo">>, ok, <<>>} = json:decode(B1, ok, #{}), + + Multiple = ~#12345 1.30 "String1" -0.31e2\n["an array"]12345\n#, ok = multi_stream_decode(Multiple), ok. @@ -794,7 +802,7 @@ multi_stream_decode(Strs) -> {R1, [], ContBin} -> multi_stream_decode(ContBin); Other -> - io:format("~p '~ts'~n~p~n", [R1,ContBin, Other]), + io:format("~p '~tp'~n~p~n", [R1,ContBin, Other]), error end. @@ -802,14 +810,21 @@ byte_loop(Bin) -> {continue, State} = json:decode_start(<<>>, [], #{}), byte_loop(Bin, State, []). -byte_loop(<>, State0, Bytes) -> +byte_loop(<> = Orig, State0, Bytes) -> %% io:format("cont with '~s' ~p~n",[lists:reverse([Byte|Bytes]), State0]), case json:decode_continue(<>, State0) of {continue, State} -> byte_loop(Rest, State, [Byte|Bytes]); {Result, [], <<>>} -> %% trim to match the binary in return value - {Result, [], string:trim(Rest, leading)} + case string:trim(Rest, leading) of + <<>> -> + {Result, [], <<>>}; + _ when ?is_ws(Byte) -> + {Result, [], Orig}; + _ -> + {Result, [], Rest} + end end; byte_loop(<<>>, State, _Bytes) -> json:decode_continue(end_of_input, State).