Skip to content

Commit

Permalink
Merge branch 'ndjson-nested-vector-bug'
Browse files Browse the repository at this point in the history
  • Loading branch information
mtmorgan committed Jun 28, 2024
2 parents 2889365 + 6ff41b8 commit 200955d
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 58 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rjsoncons
Title: Query, Pivot, Patch, and Validate 'JSON' and 'NDJSON'
Version: 1.3.0.9100
Version: 1.3.0.9200
Authors@R: c(
person(
"Martin", "Morgan", role = c("aut", "cre"),
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# rjsoncons 1.3.1

- (1.3.0.9200) bug fix: NDJSON `j_pivot('{"a": [1,2]}')` now pivots as
`'{"a": [[1,2]]}'`. NDJSON `j_pivot()` records must *always* be
objects.
- (1.3.0.9100) add JSONPath to examples article.

# rjsoncons 1.3.0
Expand Down
3 changes: 2 additions & 1 deletion R/rquerypivot.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ j_query <-
#'
#' ## use j_pivot() to filter 'PushEvent' for organizations
#' path <- "[{id: id, type: type, org: org}]
#' [[email protected] == 'PushEvent' && @.org != null]"
#' [[email protected] == 'PushEvent' && @.org != null] |
#' [0]"
#' j_pivot(ndjson_file, path, as = "data.frame")
#'
#' ## try also
Expand Down
22 changes: 22 additions & 0 deletions inst/tinytest/test_rquerypivot.R
Original file line number Diff line number Diff line change
Expand Up @@ -167,5 +167,27 @@ expect_identical(j_pivot(json), expected)
expect_identical(j_pivot(ndjson_vector), expected)
expect_identical(j_pivot(ndjson_con), expected)

json <- '[{"a": [1,2]}]' # nested vector
ndjson_vector <- '{"a": [1, 2]}'
writeLines(ndjson_vector, ndjson_con)
expected <- '{"a":[[1,2]]}'
expect_identical(j_pivot(json), expected)
expect_identical(
j_pivot(ndjson_vector, data_type = "ndjson"),
expected
)
expect_identical(
j_pivot(ndjson_con, data_type = c("ndjson", "file")),
expected
)

json <- '[{"a": [1, 2]}, {"a": [3, 4]}]'
ndjson_vector <- c('{"a": [1, 2]}', '{"a": [3, 4]}')
writeLines(ndjson_vector, ndjson_con)
expected <- '{"a":[[1,2],[3,4]]}'
expect_identical(j_pivot(json), expected)
expect_identical(j_pivot(ndjson_vector), expected)
expect_identical(j_pivot(ndjson_con), expected)

expect_identical(j_data_type(json), "json") # FIXME: can we be smarter
expect_identical(j_data_type(ndjson_vector), "ndjson")
3 changes: 2 additions & 1 deletion man/rquerypivot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

92 changes: 38 additions & 54 deletions src/rquerypivot.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class rquerypivot

// pivot implementation

std::vector<std::string> all_keys(const Json j)
std::vector<std::string> pivot_json_all_keys(const Json j)
{
// 'keys' returns keys in the order they are discoverd, 'seen' is
// used as a filter to only insert unseen keys
Expand All @@ -73,10 +73,10 @@ class rquerypivot
return keys;
}

Json pivot_array_as_object(const Json j)
Json pivot_json_array(const Json j)
{
Json object(json_object_arg);
const std::vector<std::string> keys = all_keys(j);
const std::vector<std::string> keys = pivot_json_all_keys(j);

// initialize
for (const auto& key : keys) {
Expand All @@ -98,56 +98,53 @@ class rquerypivot
return object;
}

Json pivot(const Json j)
void pivot_json(Json j)
{
Json value;

switch(j.type()) {
case json_type::null_value:
// 'null' is treated as '{}'
j = Json(json_object_arg);
break;
case json_type::object_value: {
// 'object_value' assumes j is already object-of-array
value = j;
// all members of 'j' need to be JSON array
for (auto& member: j.object_range()) {
auto key = member.key();
if (member.value().type() != json_type::array_value) {
Json ja = Json::make_array(1, j[key]);
j[key].swap(ja);
}
}
break;
}
case json_type::array_value: {
value = pivot_array_as_object(j);
j = pivot_json_array(j);
break;
}
default: {
cpp11::stop("`j_pivot()` 'path' must yield an object or array");
}}

// a Json object-of-arrays
return value;
}

void pivot_append_result(Json j)
result_.push_back(j);
}

void pivot_ndjson(Json j)
{
if (j.type() == json_type::null_value) {
// 'null' is treated as '{}'
j = Json(json_object_arg);
// skip 'null' records
return;
}

// all members of 'j' need to be JSON array
for (auto& member: j.object_range()) {
auto key = member.key();
if (member.value().type() != json_type::array_value) {
if (result_.size() == 0) {
// result_.push_back(Json(json_object_arg));
for (auto& member: j.object_range()) {
// all members of 'j' need to be JSON arrays
auto key = member.key();
Json ja = Json::make_array(1, j[key]);
j[key].swap(ja);
}
}

if (result_.size() == 0) {
// first pivot - insert (even '{}') & exit
result_.push_back(j);
return;
} else if (result_.size() == 1 && result_[0].size() == 0) {
// first pivot was '{}' -- replace with current
result_[0] = j;
return;
} else if (j.size() == 0) {
// filter empty pivots
return;
}

// insert j.member after result_[0].member. three cases:
Expand All @@ -157,9 +154,7 @@ class rquerypivot
// use unordered_set to keep track of key status in result_[0], j
// fill j_keys; trim as each key from j is added to result_[0]
std::unordered_set<std::string> j_keys;
std::size_t n_j = 0;
for (const auto& j_elt : j.object_range()) {
n_j = std::max(n_j, j_elt.value().size());
j_keys.insert(j_elt.key());
}

Expand All @@ -175,26 +170,14 @@ class rquerypivot
}
// insert j[r_elt.key()] after r_elt.value()
const auto& j_elt = j[r_elt.key()];
r_elt.value().insert(
r_elt.value().array_range().end(),
j_elt.array_range().begin(),
j_elt.array_range().end());
r_elt.value().push_back(j_elt);
// remove key from 'j', leaving keys not in r
j_keys.erase(r_elt.key());
}

// key only in result_[0]
if (r_keys.size()) {
// construct array of n_j 'null' to pad each key
Json pad(json_array_arg);
pad.reserve(n_j);
for (std::size_t i = 0; i < n_j; ++i)
pad.push_back(Json::null());
for (auto& r_key : r_keys) {
result_[0][r_key].insert(
result_[0][r_key].array_range().end(),
pad.array_range().begin(), pad.array_range().end());
}
// key only in result_[0] -- insert 'null'
for (auto& r_key : r_keys) {
result_[0][r_key].push_back(Json::null());
}

// key only in j
Expand All @@ -205,11 +188,9 @@ class rquerypivot
for (std::size_t i = 0; i < n_r; ++i)
pad.push_back(Json::null());
for (auto& j_key : j_keys) {
// initialize key as empty array
result_[0][j_key] = pad;
result_[0][j_key].insert(
result_[0][j_key].array_range().end(),
j[j_key].array_range().begin(),
j[j_key].array_range().end());
result_[0][j_key].push_back(j[j_key]);
}
}
}
Expand Down Expand Up @@ -240,8 +221,11 @@ class rquerypivot
void pivot_transform(Json j)
{
Json q = query(j);
Json p = pivot(q);
pivot_append_result(p);
if (data_type_ == data_type::json_data_type) {
pivot_json(q);
} else {
pivot_ndjson(q);
}
}

void flatten_transform(Json j)
Expand Down
3 changes: 2 additions & 1 deletion vignettes/a_rjsoncons.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ compared to `as = "data.frame"`).
```{r ndjson_j_pivot_filter}
path <-
"[{id: id, type: type, org: org}]
[[email protected] == 'PushEvent' && @.org != null]"
[[email protected] == 'PushEvent' && @.org != null] |
[0]"
j_pivot(ndjson_file, path, as = "data.frame")
```

Expand Down

0 comments on commit 200955d

Please sign in to comment.