Skip to content

Commit

Permalink
add JSONpath support for j_flatten() and j_find_*()
Browse files Browse the repository at this point in the history
  • Loading branch information
mtmorgan committed Mar 23, 2024
1 parent 1a7fdf4 commit 0cc9603
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 41 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rjsoncons
Title: 'C++' Header-Only 'jsoncons' Library for 'JSON' Queries
Version: 1.2.0.9703
Version: 1.2.0.9704
Authors@R: c(
person(
"Martin", "Morgan", role = c("aut", "cre"),
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# rjsoncons 1.3.0

- (1.2.0.9703) add key and value search with `j_flatten()`, `j_find_*()`
- (1.2.0.9704) add key and value search with `j_flatten()`,
`j_find_*()` supporting JSONpointer and JSONpath.
- (1.2.0.9602) compile on Ubuntu 18.04
<https://github.com/mtmorgan/rjsoncons/issues/3>
- (1.2.0.9503) add JSON patch support with `j_patch_apply()`,
Expand Down
90 changes: 67 additions & 23 deletions R/flatten.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
## internal implementation of .j_flatten, always returns a list to
## simplify j_find_*() processing of both JSON & NDJSON
.j_flatten <-
function(data, object_names, as, ..., n_records, verbose, data_type)
function(
data, object_names, as, ..., n_records, verbose,
data_type, path_type)
{
## initialize constants to enable code re-use
path <- ""
path_type <- j_path_type(path)
path <- switch(
path_type, JSONpointer = "", JSONpath = "$",
stop("unsupported path_type '", path_type, "'", call. = FALSE)
)

## validity
.j_valid(data_type, object_names, path, path_type, n_records, verbose)
Expand Down Expand Up @@ -33,6 +37,27 @@
do.call(grepl, args)
}

## internal function to find keys from JSONpointer or JSONpath paths
.j_find_keys_from_path <-
function(path, path_type)
{
if (identical(path_type, "JSONpointer")) {
path <- sub("^/", "", path)
strsplit(path, "/")
} else if (identical(path_type, "JSONpath")) {
## FIXME: the path needs to be parsed propertly, not via
## regex. Also, paths contain enough information to
## distinguish between string and integer keys, but all keys
## are treated as integers
path <- sub("^\\$", "", path)
path <- sub("^\\[(.*)\\]$", "\\1", path)
keys0 <- strsplit(path, "][", fixed = TRUE)
lapply(keys0, sub, pattern = "^'(.*)'$", replacement = "\\1")
} else {
stop("unsupported path_type '", path_type, "'", call. = FALSE)
}
}

## internal function to format j_find_*() result
.j_find_format <-
function(flattened, as, data_type)
Expand Down Expand Up @@ -71,6 +96,9 @@
#' `j_flatten()`, either "string" or "R". For other functions on
#' this page, one of "R", "data.frame", or "tibble".
#'
#' @param path_type character(1) type of 'path' to be returned; one of
#' '"JSONpointer"', '"JSONpath"'; '"JMESpath"' is not supported.
#'
#' @details Functions documented on this page expand `data` into all
#' path / value pairs. This is not suitable for very large JSON
#' documents.
Expand Down Expand Up @@ -100,20 +128,26 @@
#' }
#' }'
#'
#' ## JSONpointer
#' j_flatten(json) |>
#' cat("\n")
#'
#' ## JSONpath
#' j_flatten(json, as = "R", path_type = "JSONpath") |>
#' str()
#'
#' @export
j_flatten <-
function(
data, object_names = "asis", as = "string", ...,
n_records = Inf, verbose = FALSE, data_type = j_data_type(data)
n_records = Inf, verbose = FALSE,
data_type = j_data_type(data), path_type = "JSONpointer"
)
{
stopifnot(.is_scalar_character(as), as %in% c("string", "R"))
result <- .j_flatten(
data, object_names, as, ...,
n_records = n_records, verbose = verbose, data_type = data_type
data, object_names, as, ..., n_records = n_records, verbose = verbose,
data_type = data_type, path_type = path_type
)
if (data_type[[1]] %in% c("json", "R"))
result <- result[[1]]
Expand Down Expand Up @@ -147,16 +181,17 @@ j_flatten <-
j_find_values <-
function(
data, values, object_names = "asis", as = "R", ...,
n_records = Inf, verbose = FALSE, data_type = j_data_type(data)
n_records = Inf, verbose = FALSE,
data_type = j_data_type(data), path_type = "JSONpointer"
)
{
stopifnot(
.is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
)

result <- .j_flatten(
data, object_names, "R", ...,
n_records = n_records, verbose = verbose, data_type = data_type
data, object_names, "R", ..., n_records = n_records, verbose = verbose,
data_type = data_type, path_type = path_type
)
flattened <- lapply(result, function(json_record) {
Filter(\(x) x %in% values, json_record)
Expand All @@ -179,12 +214,15 @@ j_find_values <-
#' @examples
#' j_find_values_grep(json, "missing", as = "tibble")
#'
#' ## JSONpath
#' j_find_values_grep(json, "missing", as = "tibble", path_type = "JSONpath")
#'
#' @export
j_find_values_grep <-
function(
data, pattern, object_names = "asis", as = "R", ...,
n_records = Inf, verbose = FALSE, data_type = j_data_type(data),
grep_args = list()
grep_args = list(), n_records = Inf, verbose = FALSE,
data_type = j_data_type(data), path_type = "JSONpointer"
)
{
stopifnot(
Expand All @@ -194,8 +232,8 @@ j_find_values_grep <-
)

result <- .j_flatten(
data, object_names, "R", ...,
n_records = n_records, verbose = verbose, data_type = data_type
data, object_names, "R", ..., n_records = n_records, verbose = verbose,
data_type = data_type, path_type = path_type
)
flattened <- lapply(result, function(json_record, grep_args) {
values <- unlist(json_record, use.names = FALSE)
Expand Down Expand Up @@ -226,11 +264,15 @@ j_find_values_grep <-
#' j_find_keys(json, "1", as = "tibble")
#' j_find_keys(json, c("discards", "warnings"), as = "tibble")
#'
#' ## JSONpath
#' j_find_keys(json, "discards", as = "tibble", path_type = "JSONpath")
#'
#' @export
j_find_keys <-
function(
data, keys, object_names = "asis", as = "R", ...,
n_records = Inf, verbose = FALSE, data_type = j_data_type(data)
n_records = Inf, verbose = FALSE,
data_type = j_data_type(data), path_type = "JSONpointer"
)
{
stopifnot(
Expand All @@ -239,12 +281,11 @@ j_find_keys <-
)

result <- .j_flatten(
data, object_names, "R", ...,
n_records = n_records, verbose = verbose, data_type = data_type
data, object_names, "R", ..., n_records = n_records, verbose = verbose,
data_type = data_type, path_type = path_type
)
flattened <- lapply(result, function(json_record) {
paths <- names(json_record)
keys0 <- strsplit(paths, "/")
keys0 <- .j_find_keys_from_path(names(json_record), path_type)
idx0 <- unlist(keys0) %in% keys
idx <- unique(rep(seq_along(keys0), lengths(keys0))[idx0])
json_record[idx]
Expand All @@ -259,19 +300,22 @@ j_find_keys <-
#' regular expression.
#'
#' @details For `j_find_keys_grep()`, the `key` can define a pattern
#' that spans across JSONpointer path elements.
#' that spans across JSONpointer or JSONpath elements.
#'
#' @examples
#' j_find_keys_grep(json, "discard", as = "tibble")
#' j_find_keys_grep(json, "1", as = "tibble")
#' j_find_keys_grep(json, "car.*/101", as = "tibble")
#'
#' ## JSONpath
#' j_find_keys_grep(json, "car.*\\['101", as = "tibble", path_type = "JSONpath")
#'
#' @export
j_find_keys_grep <-
function(
data, pattern, object_names = "asis", as = "R", ...,
n_records = Inf, verbose = FALSE, data_type = j_data_type(data),
grep_args = list()
grep_args = list(), n_records = Inf, verbose = FALSE,
data_type = j_data_type(data), path_type = "JSONpointer"
)
{
stopifnot(
Expand All @@ -280,8 +324,8 @@ j_find_keys_grep <-
)

result <- .j_flatten(
data, object_names, "R", ...,
n_records = n_records, verbose = verbose, data_type = data_type
data, object_names, "R", ..., n_records = n_records, verbose = verbose,
data_type = data_type, path_type = path_type
)
flattened <- lapply(result, function(json_record, grep_args) {
idx <- .j_find_grepl(pattern, names(json_record), grep_args)
Expand Down
28 changes: 28 additions & 0 deletions inst/tinytest/test_flatten.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,34 @@ flat_r <- list(
)
named_list <- structure(list(), names = character(0))

## .j_find_keys_from_path

.j_find_keys_from_path <- rjsoncons:::.j_find_keys_from_path

expect_identical(.j_find_keys_from_path("/", "JSONpointer"), list(character()))
expect_identical(.j_find_keys_from_path("/a", "JSONpointer"), list("a"))
expect_identical(
.j_find_keys_from_path("/a/b", "JSONpointer"),
list(c("a", "b"))
)
expect_identical(
.j_find_keys_from_path("/a/1", "JSONpointer"),
list(c("a", "1"))
)

expect_identical(.j_find_keys_from_path("$", "JSONpath"), list(character()))
expect_identical(.j_find_keys_from_path("$['a']", "JSONpath"), list("a"))
expect_identical(
.j_find_keys_from_path("$['a']['b']", "JSONpath"),
list(c("a", "b"))
)
expect_identical(
.j_find_keys_from_path("$['a'][1]", "JSONpath"),
list(c("a", "1"))
)

expect_error(.j_find_keys_from_path("@", "JMESpath"))

## j_flatten

expect_identical(j_flatten(json), flat)
Expand Down
Loading

0 comments on commit 0cc9603

Please sign in to comment.