From 6e67821c3e2910f537ef9aaeb82dd0df9e74fdd2 Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Thu, 14 Aug 2025 12:45:14 -0600 Subject: [PATCH 1/2] pack: fix token count regression in JSON tokenizer and remove parser reset This patch fixes a regression in flb_json_tokenise() where failure to reinitialize the jsmn_parser after reallocating tokens led to invalid or duplicated token counts on repeated parses. Signed-off-by: Eduardo Silva --- src/flb_pack.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/flb_pack.c b/src/flb_pack.c index 2496ab66223..28be5551fcb 100644 --- a/src/flb_pack.c +++ b/src/flb_pack.c @@ -64,6 +64,7 @@ int flb_json_tokenise(const char *js, size_t len, ret = jsmn_parse(&state->parser, js, len, state->tokens, state->tokens_size); + while (ret == JSMN_ERROR_NOMEM) { /* Get current size of the array in bytes */ old_size = state->tokens_size * sizeof(jsmntok_t); @@ -79,13 +80,11 @@ int flb_json_tokenise(const char *js, size_t len, state->tokens = tmp; state->tokens_size += new_tokens; - /* Reset parser to reprocess the JSON data from the beginning */ - jsmn_init(&state->parser); - ret = jsmn_parse(&state->parser, js, len, state->tokens, state->tokens_size); } + if (ret == JSMN_ERROR_INVAL) { return FLB_ERR_JSON_INVAL; } @@ -96,7 +95,8 @@ int flb_json_tokenise(const char *js, size_t len, return FLB_ERR_JSON_PART; } - state->tokens_count += ret; + /* always use jsmn_parser.toknext to count tokens */ + state->tokens_count = state->parser.toknext; return 0; } From f07ad62200854c3d7b25d88a7ddda981bc43f63a Mon Sep 17 00:00:00 2001 From: Eduardo Silva Date: Thu, 14 Aug 2025 12:46:06 -0600 Subject: [PATCH 2/2] tests: internal: pack: add test_json_pack_token_count_overflow test Signed-off-by: Eduardo Silva --- tests/internal/pack.c | 53 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/tests/internal/pack.c b/tests/internal/pack.c index ae59b4d32f9..e7a88367ba3 100644 --- a/tests/internal/pack.c +++ b/tests/internal/pack.c @@ -294,6 +294,7 @@ void test_json_dup_keys() flb_free(data_out); } +/* https://github.com/fluent/fluent-bit/issues/342 */ void test_json_pack_bug342() { int i = 0; @@ -989,7 +990,7 @@ void test_json_pack_empty_array() /* unpack just to validate the msgpack buffer */ msgpack_unpacked result; - msgpack_object obj; + size_t off = 0; msgpack_unpacked_init(&result); off = 0; @@ -1103,7 +1104,7 @@ void test_json_pack_large_uint64() }; for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { - p_in = test_cases[i].json_str; + p_in = (char *) test_cases[i].json_str; len_in = strlen(p_in); expected = test_cases[i].expected_val; @@ -1137,6 +1138,50 @@ void test_json_pack_large_uint64() } } +void test_json_pack_token_count_overflow() +{ + int i; + flb_sds_t json = NULL; + struct flb_pack_state state; + int ret; + + flb_pack_state_init(&state); + + /* Create a JSON array big enough to trigger realloc in flb_json_tokenise */ + json = flb_sds_create("["); + for (i = 0; i < 300; i++) { + if (i > 0) { + flb_sds_cat_safe(&json, ",", 1); + } + json = flb_sds_printf(&json, "%d", i); + } + flb_sds_cat_safe(&json, "]", 1); + + if (!TEST_CHECK(json != NULL)) { + TEST_MSG("Failed to allocate JSON string"); + exit(1); + } + + /* First parse: forces realloc at least once (because by default we have space for 256 tokens) */ + ret = flb_json_tokenise(json, flb_sds_len(json), &state); + TEST_CHECK(ret == 0); + printf("\nFirst parse: tokens_count=%d\n", state.tokens_count); + + /* Second parse with the same JSON and same state — should be ~301, but will be doubled if bug exists */ + ret = flb_json_tokenise(json, flb_sds_len(json), &state); + TEST_CHECK(ret == 0); + printf("Second parse: tokens_count=%d (BUG if > ~301)\n", state.tokens_count); + + TEST_CHECK(state.tokens_count == 301); + if (state.tokens_count != 301) { + TEST_MSG("tokens_count=%d (BUG if > ~301)\n", state.tokens_count); + exit(1); + } + + flb_sds_destroy(json); + flb_pack_state_reset(&state); +} + TEST_LIST = { /* JSON maps iteration */ { "json_pack" , test_json_pack }, @@ -1161,7 +1206,7 @@ TEST_LIST = { /* Mixed bytes, check JSON encoding */ { "utf8_to_json", test_utf8_to_json}, { "json_pack_surrogate_pairs", test_json_pack_surrogate_pairs}, - { "json_pack_surrogate_pairs_with_replacement", - test_json_pack_surrogate_pairs_with_replacement}, + { "json_pack_surrogate_pairs_with_replacement", test_json_pack_surrogate_pairs_with_replacement}, + { "json_pack_token_count_overflow", test_json_pack_token_count_overflow}, { 0 } };