Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions cpp/src/arrow/json/chunker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,12 @@ namespace {
class ParsingBoundaryFinder : public BoundaryFinder {
public:
Status FindFirst(string_view partial, string_view block, int64_t* out_pos) override {
// NOTE: We could bubble up JSON parse errors here, but the actual parsing
// step will detect them later anyway.
auto length = ConsumeWholeObject(MultiStringStream({partial, block}));
if (length == string_view::npos) {
*out_pos = -1;
} else if (ARROW_PREDICT_FALSE(length < partial.size())) {
return Status::Invalid("JSON chunk error: invalid data at end of document");
} else {
DCHECK_GE(length, partial.size());
DCHECK_LE(length, partial.size() + block.size());
*out_pos = static_cast<int64_t>(length - partial.size());
}
Expand Down
15 changes: 15 additions & 0 deletions cpp/src/arrow/json/chunker_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <string_view>
#include <vector>

#include <gmock/gmock-matchers.h>
#include <gtest/gtest.h>

#include "arrow/buffer.h"
Expand Down Expand Up @@ -261,6 +262,20 @@ TEST(ChunkerTest, StraddlingSingleLine) {
AssertStraddledChunking(*chunker, join(lines(), ""));
}

TEST(ChunkerTest, Errors) {
std::string parts[] = {R"({"a":0})", "}", R"({"a":1})"};
auto chunker = MakeChunker(true);
std::shared_ptr<Buffer> whole, rest, completion;
ASSERT_OK(chunker->Process(Buffer::FromString(parts[0] + parts[1]), &whole, &rest));
ASSERT_EQ(std::string_view(*whole), parts[0]);
ASSERT_EQ(std::string_view(*rest), parts[1]);
auto status =
chunker->ProcessWithPartial(rest, Buffer::FromString(parts[2]), &completion, &rest);
ASSERT_RAISES(Invalid, status);
EXPECT_THAT(status.message(),
::testing::StartsWith("JSON chunk error: invalid data at end of document"));
}

TEST_P(BaseChunkerTest, StraddlingEmpty) {
auto all = join(lines(), "\n");

Expand Down
7 changes: 5 additions & 2 deletions cpp/src/arrow/json/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ class HandlerBase : public BlockParser,

protected:
template <typename Handler, typename Stream>
Status DoParse(Handler& handler, Stream&& json) {
Status DoParse(Handler& handler, Stream&& json, size_t json_size) {
constexpr auto parse_flags = rj::kParseIterativeFlag | rj::kParseNanAndInfFlag |
rj::kParseStopWhenDoneFlag |
rj::kParseNumbersAsStringsFlag;
Expand All @@ -776,6 +776,9 @@ class HandlerBase : public BlockParser,
// parse the next object
continue;
case rj::kParseErrorDocumentEmpty:
if (json.Tell() < json_size) {
return ParseError(rj::GetParseError_En(ok.Code()));
}
// parsed all objects, finish
return Status::OK();
case rj::kParseErrorTermination:
Expand All @@ -794,7 +797,7 @@ class HandlerBase : public BlockParser,
RETURN_NOT_OK(ReserveScalarStorage(json->size()));
rj::MemoryStream ms(reinterpret_cast<const char*>(json->data()), json->size());
using InputStream = rj::EncodedInputStream<rj::UTF8<>, rj::MemoryStream>;
return DoParse(handler, InputStream(ms));
return DoParse(handler, InputStream(ms), static_cast<size_t>(json->size()));
}

/// \defgroup handlerbase-append-methods append non-nested values
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/json/parser_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,14 @@ TEST(BlockParser, InferNewFieldsInMiddle) {
}
}

TEST(BlockParser, FailOnInvalidEOF) {
std::shared_ptr<Array> parsed;
auto status = ParseFromString(ParseOptions::Defaults(), "}", &parsed);
ASSERT_RAISES(Invalid, status);
EXPECT_THAT(status.message(),
::testing::StartsWith("JSON parse error: The document is empty"));
}

TEST(BlockParser, AdHoc) {
auto options = ParseOptions::Defaults();
options.unexpected_field_behavior = UnexpectedFieldBehavior::InferType;
Expand Down
15 changes: 15 additions & 0 deletions cpp/src/arrow/json/reader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,5 +305,20 @@ TEST(ReaderTest, ListArrayWithFewValues) {
AssertTablesEqual(*actual_table, *expected_table);
}

TEST(ReaderTest, FailOnInvalidEOF) {
auto read_options = ReadOptions::Defaults();
auto parse_options = ParseOptions::Defaults();
read_options.use_threads = false;
std::shared_ptr<io::InputStream> input;
ASSERT_OK(MakeStream("}", &input));

for (auto newlines_in_values : {false, true}) {
parse_options.newlines_in_values = newlines_in_values;
ASSERT_OK_AND_ASSIGN(auto reader, TableReader::Make(default_memory_pool(), input,
read_options, parse_options));
ASSERT_RAISES(Invalid, reader->Read());
}
}

} // namespace json
} // namespace arrow