From 60b51dfe2f5692d8b7ee0207c29c0495ae4c2f6b Mon Sep 17 00:00:00 2001
From: Martin Morgan <Martin.Morgan@RoswellPark.org>
Date: Tue, 19 Mar 2024 01:23:19 -0400
Subject: [PATCH 1/7] add j_flatten(), j_find_*()

---
 NAMESPACE                      |   5 +
 R/cpp11.R                      |   8 ++
 R/flatten.R                    | 233 +++++++++++++++++++++++++++++++++
 R/rquerypivot.R                |   2 +-
 inst/extdata/flatten_data.json |  12 ++
 inst/tinytest/test_flatten.R   | 103 +++++++++++++++
 man/flatten.Rd                 | 162 +++++++++++++++++++++++
 src/cpp11.cpp                  |  16 +++
 src/flatten.cpp                |  63 +++++++++
 src/rquerypivot.h              |  18 +++
 10 files changed, 621 insertions(+), 1 deletion(-)
 create mode 100644 R/flatten.R
 create mode 100644 inst/extdata/flatten_data.json
 create mode 100644 inst/tinytest/test_flatten.R
 create mode 100644 man/flatten.Rd
 create mode 100644 src/flatten.cpp

diff --git a/NAMESPACE b/NAMESPACE
index 2c9a8e6..e7e9515 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,11 @@ S3method(j_patch_op,j_patch_op)
 S3method(print,j_patch_op)
 export(as_r)
 export(j_data_type)
+export(j_find_keys)
+export(j_find_keys_grep)
+export(j_find_values)
+export(j_find_values_grep)
+export(j_flatten)
 export(j_patch_apply)
 export(j_patch_from)
 export(j_patch_op)
diff --git a/R/cpp11.R b/R/cpp11.R
index a1c38ae..bbd8fb3 100644
--- a/R/cpp11.R
+++ b/R/cpp11.R
@@ -1,5 +1,13 @@
 # Generated by cpp11: do not edit by hand
 
+cpp_j_flatten <- function(data, data_type, object_names, as, path, path_type) {
+  .Call(`_rjsoncons_cpp_j_flatten`, data, data_type, object_names, as, path, path_type)
+}
+
+cpp_j_flatten_con <- function(con, data_type, object_names, as, path, path_type, n_records, verbose) {
+  .Call(`_rjsoncons_cpp_j_flatten_con`, con, data_type, object_names, as, path, path_type, n_records, verbose)
+}
+
 cpp_j_patch_apply <- function(data, data_type, patch, as) {
   .Call(`_rjsoncons_cpp_j_patch_apply`, data, data_type, patch, as)
 }
diff --git a/R/flatten.R b/R/flatten.R
new file mode 100644
index 0000000..6bf10d2
--- /dev/null
+++ b/R/flatten.R
@@ -0,0 +1,233 @@
+#' @rdname flatten
+#'
+#' @title Flatten and find keys or values
+#'
+#' @description `j_flatten()` transforms a JSON document into a list
+#'     where names are JSONpointer 'keys' and elements are the
+#'     corresponding 'values' from the JSON document.
+#'
+#' @inheritParams j_query
+#'
+#' @param as character(1) describing the return type.  For
+#'     `j_flatten()`, either "string" or "R". For other functions on
+#'     this page, one of "list", "data.frame", or "tibble".
+#'
+#' @details
+#'
+#' Functions documented on this page expand `data` into all key /
+#' value pairs. This is not suitable for very large JSON documents.
+#'
+#' @return `j_flatten()` returns a named list, where `names()` are the
+#'     JSONpointer paths to each element in the JSON document and list
+#'     elements are the corresponding values.
+#'
+#' @examples
+#' json <- '{
+#'     "discards": {
+#'         "1000": "Record does not exist",
+#'         "1004": "Queue limit exceeded",
+#'         "1010": "Discarding timed-out partial msg"
+#'     },
+#'     "warnings": {
+#'         "0": "Phone number missing country code",
+#'         "1": "State code missing",
+#'         "2": "Zip code missing"
+#'     }
+#' }'
+#'
+#' j_flatten(json) |>
+#'     str()
+#'
+#' @export
+j_flatten <-
+    function(
+        data, object_names = "asis", as = "string", ...,
+        n_records = Inf, verbose = FALSE,
+        data_type = j_data_type(data)
+    )
+{
+    ## initialize constants to enable code re-use
+    path <- ""
+    path_type <- j_path_type(path)
+
+    ## validity
+    .j_valid(data_type, object_names, path, path_type, n_records, verbose)
+    stopifnot(.is_scalar_character(as), as %in% c("string", "R"))
+
+    data <- .as_json_string(data, data_type, ...)
+    result <- do_cpp(
+        cpp_j_flatten, cpp_j_flatten_con,
+        data, data_type, object_names, as, path, path_type,
+        n_records = n_records, verbose = verbose
+    )
+
+    if (data_type[[1]] %in% c("json", "R"))
+        result <- result[[1]]
+
+    result
+}
+
+j_find_format <-
+    function(flattened, as)
+{
+    if (identical(as, "list")) {
+        flattened
+    } else {
+        keys <- names(flattened)
+        values <- unlist(flattened, use.names = FALSE)
+        switch(
+            as,
+            data.frame = data.frame(key = keys, value = values),
+            tibble = tibble::tibble(key = keys, value = values)
+        )
+    }
+}
+
+#' @rdname flatten
+#'
+#' @description `j_find_values()` finds paths to exactly matching
+#'     values.
+#'
+#' @param values vector of one or more values, all of the same type
+#'     (e.g., double, integer, character).
+#'
+#' @return `j_find_values()` and `j_find_values_grep()` return a list
+#'     with names as JSONpointer paths and list elements the matching
+#'     values, or a `data.frame` or `tibble` with columns `path` and
+#'     `value`. Values are coerced to a common type when `as` is
+#'     `data.frame` or `tibble`.
+#'
+#' @examples
+#' j_find_values(json, "Zip code missing", as = "tibble")
+#' j_find_values(
+#'     json,
+#'     c("Queue limit exceeded", "Zip code missing"),
+#'     as = "tibble"
+#' )
+#'
+#' @export
+j_find_values <-
+    function(
+        data, values, object_names = "asis", as = "list",
+        data_type = j_data_type(data)
+    )
+{
+    types <- unique(vapply(values, typeof, character(1)))
+    stopifnot(
+        length(types) == 1L,
+        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+    )
+
+    flattened0 <- j_flatten(data, object_names, "R")
+    flattened <- Filter(\(x) x %in% values, flattened0)
+
+    j_find_format(flattened, as)
+}
+
+#' @rdname flatten
+#'
+#' @description `j_find_values_grep()` finds paths to values matching
+#'     a regular expression.
+#'
+#' @param pattern character(1) regular expression to match values or
+#'     keys.
+#'
+#' @param ... for `j_find_values_grep()` and `j_find_keys_grep()`,
+#'     additional arguments passed to `grepl()`.
+#'
+#' @examples
+#' j_find_values_grep(json, "missing", as = "tibble")
+#'
+#' @export
+j_find_values_grep <-
+    function(
+        data, pattern, ..., object_names = "asis", as = "list",
+        data_type = j_data_type(data)
+    )
+{
+    stopifnot(
+        .is_scalar_character(pattern),
+        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+    )
+
+    flattened <- j_flatten(data, object_names, "R")
+    values <- unlist(flattened, use.names = FALSE)
+    idx <- grepl(pattern, values, ...)
+
+    j_find_format(flattened[idx], as)
+}
+
+#' @rdname flatten
+#'
+#' @description `j_find_keys()` finds paths to exactly matching keys.
+#'
+#' @param keys character() vector of one or more keys to be matched
+#'     exactly to path elements.
+#'
+#' @details
+#'
+#' For `j_find_keys()`, the `key` must exactly match one or more
+#' consecutive keys in the JSONpointer path returned by `j_flatten()`.
+#'
+#' @return `j_find_keys()` and `j_find_keys_grep()` returns a list,
+#'     data.frame, or tibble similar to `j_find_values()` and
+#'     `j_find_values_grep()`.
+#'
+#' @examples
+#' j_find_keys(json, "discards", as = "tibble")
+#' j_find_keys(json, "1", as = "tibble")
+#' j_find_keys(json, c("discards", "warnings"), as = "tibble")
+#'
+#' @export
+j_find_keys <-
+    function(
+        data, keys, object_names = "asis", as = "list",
+        data_type = j_data_type(data)
+    )
+{
+    stopifnot(
+        is.character(keys), !anyNA(keys),
+        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+    )
+
+    flattened <- j_flatten(data, object_names, "R")
+    keys0 <- names(flattened)
+    keys1 <- strsplit(keys0, "/")
+    idx1 <- unlist(keys1) %in% keys
+    idx <- unique(rep(seq_along(keys1), lengths(keys1))[idx1])
+
+    j_find_format(flattened[idx], as)
+}
+
+#' @rdname flatten
+#'
+#' @description `j_find_keys_grep()` finds paths to keys matching a
+#'     regular expression.
+#'
+#' @details
+#'
+#' For `j_find_keys_grep()`, the `key` can define a pattern that spans
+#' across JSONpointer path elements.
+#'
+#' @examples
+#' j_find_keys_grep(json, "discard", as = "tibble")
+#' j_find_keys_grep(json, "1", as = "tibble")
+#' j_find_keys_grep(json, "car.*/101", as = "tibble")
+#'
+#' @export
+j_find_keys_grep <-
+    function(
+        data, pattern, ..., object_names = "asis", as = "list",
+        data_type = j_data_type(data)
+    )
+{
+    stopifnot(
+        .is_scalar_character(pattern),
+        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+    )
+
+    flattened <- j_flatten(data, object_names, "R")
+    idx <- grepl(pattern, names(flattened), ...)
+
+    j_find_format(flattened[idx], as)
+}
diff --git a/R/rquerypivot.R b/R/rquerypivot.R
index 84aa005..001aea4 100644
--- a/R/rquerypivot.R
+++ b/R/rquerypivot.R
@@ -71,7 +71,7 @@ j_query <-
     )
 {
     .j_valid(data_type, object_names, path, path_type, n_records, verbose)
-    stopifnot(as %in% c("string", "R"))
+    stopifnot(.is_scalar_character(as), as %in% c("string", "R"))
 
     data <- .as_json_string(data, data_type, ...)
     result <- do_cpp(
diff --git a/inst/extdata/flatten_data.json b/inst/extdata/flatten_data.json
new file mode 100644
index 0000000..1640161
--- /dev/null
+++ b/inst/extdata/flatten_data.json
@@ -0,0 +1,12 @@
+{
+    "discards": {
+        "1000": "Record does not exist",
+        "1004": "Queue limit exceeded",
+        "1010": "Discarding timed-out partial msg"
+    },
+    "warnings": {
+        "0": "Phone number missing country code",
+        "1": "State code missing",
+        "2": "Zip code missing"
+    }
+}
diff --git a/inst/tinytest/test_flatten.R b/inst/tinytest/test_flatten.R
new file mode 100644
index 0000000..0ca2424
--- /dev/null
+++ b/inst/tinytest/test_flatten.R
@@ -0,0 +1,103 @@
+json_file <- system.file(package = "rjsoncons", "extdata", "flatten_data.json")
+json <- paste0(trimws(readLines(json_file, warn = FALSE)), collapse = "")
+ojson <- paste0(
+'{',
+    '"warnings":{',
+        '"0":"Phone number missing country code",',
+        '"1":"State code missing",',
+        '"2":"Zip code missing"',
+    '},',
+    '"discards":{',
+        '"1000":"Record does not exist",',
+        '"1004":"Queue limit exceeded",',
+        '"1010":"Discarding timed-out partial msg"',
+    '}',
+'}')
+flat <- paste0(
+    '{',
+    '"/discards/1000":"Record does not exist",',
+    '"/discards/1004":"Queue limit exceeded",',
+    '"/discards/1010":"Discarding timed-out partial msg",',
+    '"/warnings/0":"Phone number missing country code",',
+    '"/warnings/1":"State code missing",',
+    '"/warnings/2":"Zip code missing"',
+    '}'
+)
+oflat <- paste0(
+    '{',
+    '"/warnings/0":"Phone number missing country code",',
+    '"/warnings/1":"State code missing",',
+    '"/warnings/2":"Zip code missing",',
+    '"/discards/1000":"Record does not exist",',
+    '"/discards/1004":"Queue limit exceeded",',
+    '"/discards/1010":"Discarding timed-out partial msg"',
+    '}'
+)
+flat_r <- list(
+    `/discards/1000` = "Record does not exist",
+    `/discards/1004` = "Queue limit exceeded", 
+    `/discards/1010` = "Discarding timed-out partial msg",
+    `/warnings/0` = "Phone number missing country code", 
+    `/warnings/1` = "State code missing",
+    `/warnings/2` = "Zip code missing"
+)
+named_list <- structure(list(), names = character(0))
+
+## j_flatten
+
+expect_identical(j_flatten(json), flat)
+expect_identical(j_flatten(json, as = "R"), flat_r)
+
+expect_identical(j_flatten(json_file, "asis"), flat)
+expect_identical(j_flatten(json_file, "asis", as = "R"), flat_r)
+
+expect_identical(j_flatten(ojson), oflat)
+expect_identical(j_flatten(ojson, "sort"), flat)
+
+
+## j_find_values
+
+expect_identical(j_find_values(json, "State code missing"), flat_r[5])
+expect_identical(
+    j_find_values(json, c("State code missing", "Queue limit exceeded")),
+    flat_r[c(2, 5)]
+)
+
+expect_identical(
+    j_find_values(
+        json, c("State code missing", "Queue limit exceeded"),
+        as = "data.frame"
+    ),
+    data.frame(
+        key = names(flat_r[c(2, 5)]),
+        value = unlist(flat_r[c(2, 5)], use.names = FALSE)
+    ),
+    info = "as = 'data.frame'"
+)
+expect_identical( # as = "tibble"
+    j_find_values(
+        json, c("State code missing", "Queue limit exceeded"),
+        as = "tibble"
+    ),
+    tibble::tibble(
+        key = names(flat_r[c(2, 5)]),
+        value = unlist(flat_r[c(2, 5)], use.names = FALSE)
+    ),
+    info = "as = 'tibble'"
+)
+
+expect_identical(j_find_values(json, "foo"), named_list)
+
+## j_find_values_grep
+
+expect_identical(j_find_values_grep(json, "missing"), flat_r[4:6])
+
+## j_find_keys
+
+expect_identical(j_find_keys(json, "warnings"), flat_r[4:6])
+expect_identical(j_find_keys(json, c("1000", "1")), flat_r[c(1, 5)])
+
+## j_find_keys_grep
+
+expect_identical(j_find_keys_grep(json, "warn"), flat_r[4:6])
+expect_identical(j_find_keys_grep(json, "ard.*10$"), flat_r[3])
diff --git a/man/flatten.Rd b/man/flatten.Rd
new file mode 100644
index 0000000..ee5826c
--- /dev/null
+++ b/man/flatten.Rd
@@ -0,0 +1,162 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/flatten.R
+\name{j_flatten}
+\alias{j_flatten}
+\alias{j_find_values}
+\alias{j_find_values_grep}
+\alias{j_find_keys}
+\alias{j_find_keys_grep}
+\title{Flatten and find keys or values}
+\usage{
+j_flatten(
+  data,
+  object_names = "asis",
+  as = "string",
+  ...,
+  n_records = Inf,
+  verbose = FALSE,
+  data_type = j_data_type(data)
+)
+
+j_find_values(
+  data,
+  values,
+  object_names = "asis",
+  as = "list",
+  data_type = j_data_type(data)
+)
+
+j_find_values_grep(
+  data,
+  pattern,
+  ...,
+  object_names = "asis",
+  as = "list",
+  data_type = j_data_type(data)
+)
+
+j_find_keys(
+  data,
+  keys,
+  object_names = "asis",
+  as = "list",
+  data_type = j_data_type(data)
+)
+
+j_find_keys_grep(
+  data,
+  pattern,
+  ...,
+  object_names = "asis",
+  as = "list",
+  data_type = j_data_type(data)
+)
+}
+\arguments{
+\item{data}{a character() JSON string or NDJSON records, or the
+name of a file or URL containing JSON or NDJSON, or an \emph{R}
+object parsed to a JSON string using \code{jsonlite::toJSON()}.}
+
+\item{object_names}{character(1) order \code{data} object elements
+\code{"asis"} (default) or \code{"sort"} before filtering on \code{path}.}
+
+\item{as}{character(1) describing the return type.  For
+\code{j_flatten()}, either "string" or "R". For other functions on
+this page, one of "list", "data.frame", or "tibble".}
+
+\item{...}{for \code{j_find_values_grep()} and \code{j_find_keys_grep()},
+additional arguments passed to \code{grepl()}.}
+
+\item{n_records}{numeric(1) maximum number of NDJSON records parsed.}
+
+\item{verbose}{logical(1) report progress when parsing large NDJSON
+files.}
+
+\item{data_type}{character(1) type of \code{data}; one of \code{"json"},
+\code{"ndjson"}. Inferred from \code{data} using \code{j_data_type()}.}
+
+\item{values}{vector of one or more values, all of the same type
+(e.g., double, integer, character).}
+
+\item{pattern}{character(1) regular expression to match values or
+keys.}
+
+\item{keys}{character() vector of one or more keys to be matched
+exactly to path elements.}
+}
+\value{
+\code{j_flatten()} returns a named list, where \code{names()} are the
+JSONpointer paths to each element in the JSON document and list
+elements are the corresponding values.
+
+\code{j_find_values()} and \code{j_find_values_grep()} return a list
+with names as JSONpointer paths and list elements the matching
+values, or a \code{data.frame} or \code{tibble} with columns \code{path} and
+\code{value}. Values are coerced to a common type when \code{as} is
+\code{data.frame} or \code{tibble}.
+
+\code{j_find_keys()} and \code{j_find_keys_grep()} returns a list,
+data.frame, or tibble similar to \code{j_find_values()} and
+\code{j_find_values_grep()}.
+}
+\description{
+\code{j_flatten()} transforms a JSON document into a list
+where names are JSONpointer 'keys' and elements are the
+corresponding 'values' from the JSON document.
+
+\code{j_find_values()} finds paths to exactly matching
+values.
+
+\code{j_find_values_grep()} finds paths to values matching
+a regular expression.
+
+\code{j_find_keys()} finds paths to exactly matching keys.
+
+\code{j_find_keys_grep()} finds paths to keys matching a
+regular expression.
+}
+\details{
+Functions documented on this page expand \code{data} into all key /
+value pairs. This is not suitable for very large JSON documents.
+
+For \code{j_find_keys()}, the \code{key} must exactly match one or more
+consecutive keys in the JSONpointer path returned by \code{j_flatten()}.
+
+For \code{j_find_keys_grep()}, the \code{key} can define a pattern that spans
+across JSONpointer path elements.
+}
+\examples{
+json <- '{
+    "discards": {
+        "1000": "Record does not exist",
+        "1004": "Queue limit exceeded",
+        "1010": "Discarding timed-out partial msg"
+    },
+    "warnings": {
+        "0": "Phone number missing country code",
+        "1": "State code missing",
+        "2": "Zip code missing"
+    }
+}'
+
+j_flatten(json) |>
+    str()
+
+j_find_values(json, "Zip code missing", as = "tibble")
+j_find_values(
+    json,
+    c("Queue limit exceeded", "Zip code missing"),
+    as = "tibble"
+)
+
+j_find_values_grep(json, "missing", as = "tibble")
+
+j_find_keys(json, "discards", as = "tibble")
+j_find_keys(json, "1", as = "tibble")
+j_find_keys(json, c("discards", "warnings"), as = "tibble")
+
+j_find_keys_grep(json, "discard", as = "tibble")
+j_find_keys_grep(json, "1", as = "tibble")
+j_find_keys_grep(json, "car.*/101", as = "tibble")
+
+}
diff --git a/src/cpp11.cpp b/src/cpp11.cpp
index 6a3e736..e660900 100644
--- a/src/cpp11.cpp
+++ b/src/cpp11.cpp
@@ -5,6 +5,20 @@
 #include "cpp11/declarations.hpp"
 #include <R_ext/Visibility.h>
 
+// flatten.cpp
+sexp cpp_j_flatten(const std::vector<std::string>& data, const std::string& data_type, const std::string& object_names, const std::string& as, const std::string& path, const std::string& path_type);
+extern "C" SEXP _rjsoncons_cpp_j_flatten(SEXP data, SEXP data_type, SEXP object_names, SEXP as, SEXP path, SEXP path_type) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_j_flatten(cpp11::as_cpp<cpp11::decay_t<const std::vector<std::string>&>>(data), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(data_type), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(object_names), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(as), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(path), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(path_type)));
+  END_CPP11
+}
+// flatten.cpp
+sexp cpp_j_flatten_con(const sexp& con, const std::string& data_type, const std::string& object_names, const std::string& as, const std::string& path, const std::string& path_type, const double n_records, const bool verbose);
+extern "C" SEXP _rjsoncons_cpp_j_flatten_con(SEXP con, SEXP data_type, SEXP object_names, SEXP as, SEXP path, SEXP path_type, SEXP n_records, SEXP verbose) {
+  BEGIN_CPP11
+    return cpp11::as_sexp(cpp_j_flatten_con(cpp11::as_cpp<cpp11::decay_t<const sexp&>>(con), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(data_type), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(object_names), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(as), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(path), cpp11::as_cpp<cpp11::decay_t<const std::string&>>(path_type), cpp11::as_cpp<cpp11::decay_t<const double>>(n_records), cpp11::as_cpp<cpp11::decay_t<const bool>>(verbose)));
+  END_CPP11
+}
 // patch.cpp
 sexp cpp_j_patch_apply(const std::string& data, const std::string& data_type, const std::string& patch, const std::string& as);
 extern "C" SEXP _rjsoncons_cpp_j_patch_apply(SEXP data, SEXP data_type, SEXP patch, SEXP as) {
@@ -80,6 +94,8 @@ extern "C" {
 static const R_CallMethodDef CallEntries[] = {
     {"_rjsoncons_cpp_as_r",          (DL_FUNC) &_rjsoncons_cpp_as_r,          3},
     {"_rjsoncons_cpp_as_r_con",      (DL_FUNC) &_rjsoncons_cpp_as_r_con,      5},
+    {"_rjsoncons_cpp_j_flatten",     (DL_FUNC) &_rjsoncons_cpp_j_flatten,     6},
+    {"_rjsoncons_cpp_j_flatten_con", (DL_FUNC) &_rjsoncons_cpp_j_flatten_con, 8},
     {"_rjsoncons_cpp_j_patch_apply", (DL_FUNC) &_rjsoncons_cpp_j_patch_apply, 4},
     {"_rjsoncons_cpp_j_patch_from",  (DL_FUNC) &_rjsoncons_cpp_j_patch_from,  5},
     {"_rjsoncons_cpp_j_patch_print", (DL_FUNC) &_rjsoncons_cpp_j_patch_print, 3},
diff --git a/src/flatten.cpp b/src/flatten.cpp
new file mode 100644
index 0000000..4a9e083
--- /dev/null
+++ b/src/flatten.cpp
@@ -0,0 +1,63 @@
+#include <jsoncons/json.hpp>
+#include <jsoncons_ext/jsonpointer/jsonpointer.hpp>
+
+#include <cpp11/declarations.hpp>
+#include "enum_index.h"
+#include "rquerypivot.h"
+
+using namespace jsoncons;
+
+[[cpp11::register]]
+sexp cpp_j_flatten(
+    const std::vector<std::string>& data, const std::string& data_type,
+    const std::string& object_names, const std::string& as,
+    const std::string& path, const std::string& path_type)
+{
+    sexp result;
+    switch(enum_index(object_names_map, object_names)) {
+    case object_names::asis: {
+        result =
+            rquerypivot<ojson>(path, as, data_type, path_type, false).
+            flatten(data);
+        break;
+    }
+    case object_names::sort: {
+        result =
+            rquerypivot<json>(path, as, data_type, path_type, false).
+            flatten(data);
+        break;
+    }
+    default: {
+        cpp11::stop("unknown `object_names = '" + object_names + "'`");
+    }}
+
+    return result;
+}
+
+[[cpp11::register]]
+sexp cpp_j_flatten_con(
+    const sexp& con, const std::string& data_type,
+    const std::string& object_names, const std::string& as,
+    const std::string& path, const std::string& path_type,
+    const double n_records, const bool verbose)
+{
+    sexp result;
+    switch(enum_index(object_names_map, object_names)) {
+    case object_names::asis: {
+        result =
+            rquerypivot<ojson>(path, as, data_type, path_type, verbose).
+            flatten(con, n_records);
+        break;
+    }
+    case object_names::sort: {
+        result =
+            rquerypivot<json>(path, as, data_type, path_type, verbose).
+            flatten(con, n_records);
+        break;
+    }
+    default: {
+        cpp11::stop("unknown `object_names = '" + object_names + "'`");
+    }}
+
+    return result;
+}
diff --git a/src/rquerypivot.h b/src/rquerypivot.h
index 6211fa6..3f80ca1 100644
--- a/src/rquerypivot.h
+++ b/src/rquerypivot.h
@@ -177,6 +177,11 @@ class rquerypivot
             pivot_append_result(p);
         }
 
+    void flatten_transform(Json j)
+        {
+            result_.push_back(jsonpointer::flatten(j));
+        }
+
     // do_strings() / do_connection()
     sexp do_strings(
         const std::vector<std::string>& data,
@@ -297,6 +302,19 @@ class rquerypivot
             return do_connection(con, n_records, &rquerypivot::pivot_transform);
         }
 
+    // flatten
+
+    sexp flatten(const std::vector<std::string>& data)
+        {
+            return do_strings(data, &rquerypivot::flatten_transform);
+        }
+
+    sexp flatten(const sexp& con, double n_records)
+        {
+            return
+                do_connection(con, n_records, &rquerypivot::flatten_transform);
+        }
+
     // as
 
     sexp as() const

From 62e8ed05dda5ecbeed39b38de8b92723e1b2c78a Mon Sep 17 00:00:00 2001
From: Martin Morgan <Martin.Morgan@RoswellPark.org>
Date: Thu, 21 Mar 2024 11:03:54 -0400
Subject: [PATCH 2/7] version bump and NEWS entry

---
 DESCRIPTION | 2 +-
 NEWS.md     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 569bfea..ff26239 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rjsoncons
 Title: 'C++' Header-Only 'jsoncons' Library for 'JSON' Queries
-Version: 1.2.0.9602
+Version: 1.2.0.9700
 Authors@R: c(
     person(
         "Martin", "Morgan", role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index 42678d2..ac4d112 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,6 @@
 # rjsoncons 1.3.0
 
+- (1.2.0.9700) add key and value search with `j_flatten()`, `j_find_*()`
 - (1.2.0.9602) compile on Ubuntu 18.04
   <https://github.com/mtmorgan/rjsoncons/issues/3>
 - (1.2.0.9503) add JSON patch support with `j_patch_apply()`,

From 929115695f554f19a545f58c2afa0a9086f4e451 Mon Sep 17 00:00:00 2001
From: Martin Morgan <Martin.Morgan@RoswellPark.org>
Date: Tue, 19 Mar 2024 14:10:29 -0400
Subject: [PATCH 3/7] minor updates to `j_flatten()` / `j_find_*()`

- use `as = "R"` for default in `j_find_*()`, consistent with other
  functions
- add section to a_introduction.Rmd vignette
- internal changes
---
 R/flatten.R                  | 32 +++++++-------
 inst/tinytest/test_flatten.R |  5 +++
 vignettes/a_rjsoncons.Rmd    | 81 ++++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 17 deletions(-)

diff --git a/R/flatten.R b/R/flatten.R
index 6bf10d2..4a27e0e 100644
--- a/R/flatten.R
+++ b/R/flatten.R
@@ -10,7 +10,7 @@
 #'
 #' @param as character(1) describing the return type.  For
 #'     `j_flatten()`, either "string" or "R". For other functions on
-#'     this page, one of "list", "data.frame", or "tibble".
+#'     this page, one of "R", "data.frame", or "tibble".
 #'
 #' @details
 #'
@@ -67,10 +67,10 @@ j_flatten <-
     result
 }
 
-j_find_format <-
+.j_find_format <-
     function(flattened, as)
 {
-    if (identical(as, "list")) {
+    if (identical(as, "R")) {
         flattened
     } else {
         keys <- names(flattened)
@@ -108,20 +108,18 @@ j_find_format <-
 #' @export
 j_find_values <-
     function(
-        data, values, object_names = "asis", as = "list",
+        data, values, object_names = "asis", as = "R",
         data_type = j_data_type(data)
     )
 {
-    types <- unique(vapply(values, typeof, character(1)))
     stopifnot(
-        length(types) == 1L,
-        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+        .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
     )
 
     flattened0 <- j_flatten(data, object_names, "R")
     flattened <- Filter(\(x) x %in% values, flattened0)
 
-    j_find_format(flattened, as)
+    .j_find_format(flattened, as)
 }
 
 #' @rdname flatten
@@ -141,20 +139,20 @@ j_find_values <-
 #' @export
 j_find_values_grep <-
     function(
-        data, pattern, ..., object_names = "asis", as = "list",
+        data, pattern, ..., object_names = "asis", as = "R",
         data_type = j_data_type(data)
     )
 {
     stopifnot(
         .is_scalar_character(pattern),
-        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+        .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
     )
 
     flattened <- j_flatten(data, object_names, "R")
     values <- unlist(flattened, use.names = FALSE)
     idx <- grepl(pattern, values, ...)
 
-    j_find_format(flattened[idx], as)
+    .j_find_format(flattened[idx], as)
 }
 
 #' @rdname flatten
@@ -181,13 +179,13 @@ j_find_values_grep <-
 #' @export
 j_find_keys <-
     function(
-        data, keys, object_names = "asis", as = "list",
+        data, keys, object_names = "asis", as = "R",
         data_type = j_data_type(data)
     )
 {
     stopifnot(
         is.character(keys), !anyNA(keys),
-        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+        .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
     )
 
     flattened <- j_flatten(data, object_names, "R")
@@ -196,7 +194,7 @@ j_find_keys <-
     idx1 <- unlist(keys1) %in% keys
     idx <- unique(rep(seq_along(keys1), lengths(keys1))[idx1])
 
-    j_find_format(flattened[idx], as)
+    .j_find_format(flattened[idx], as)
 }
 
 #' @rdname flatten
@@ -217,17 +215,17 @@ j_find_keys <-
 #' @export
 j_find_keys_grep <-
     function(
-        data, pattern, ..., object_names = "asis", as = "list",
+        data, pattern, ..., object_names = "asis", as = "R",
         data_type = j_data_type(data)
     )
 {
     stopifnot(
         .is_scalar_character(pattern),
-        .is_scalar_character(as), as %in% c("list", "data.frame", "tibble")
+        .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
     )
 
     flattened <- j_flatten(data, object_names, "R")
     idx <- grepl(pattern, names(flattened), ...)
 
-    j_find_format(flattened[idx], as)
+    .j_find_format(flattened[idx], as)
 }
diff --git a/inst/tinytest/test_flatten.R b/inst/tinytest/test_flatten.R
index 0ca2424..87d88ed 100644
--- a/inst/tinytest/test_flatten.R
+++ b/inst/tinytest/test_flatten.R
@@ -1,3 +1,4 @@
+ndjson_file <- system.file(package = "rjsoncons", "extdata", "example.ndjson")
 json_file <- system.file(package = "rjsoncons", "extdata", "flatten_data.json")
 json <- paste0(trimws(readLines(json_file, warn = FALSE)), collapse = "")
 ojson <- paste0(
@@ -54,6 +55,8 @@ expect_identical(j_flatten(json_file, "asis", as = "R"), flat_r)
 expect_identical(j_flatten(ojson), oflat)
 expect_identical(j_flatten(ojson, "sort"), flat)
 
+expect_identical(length(j_flatten(ndjson_file)), 4L)
+expect_identical(length(j_flatten(ndjson_file, n_records = 2)), 2L)
 
 ## j_find_values
 
@@ -86,6 +89,8 @@ expect_identical( # as = "tibble"
     info = "as = 'tibble'"
 )
 
+j_find_values(ndjson_file, "WA") |> str()
+
 expect_identical(j_find_values(json, "foo"), named_list)
 
 ## j_find_values_grep
diff --git a/vignettes/a_rjsoncons.Rmd b/vignettes/a_rjsoncons.Rmd
index 3b9cbb3..5a838cf 100644
--- a/vignettes/a_rjsoncons.Rmd
+++ b/vignettes/a_rjsoncons.Rmd
@@ -343,6 +343,87 @@ j_patch_from(j_patch_apply(json, patch), json)
 [JSON Patch]: https://jsonpatch.com/
 [RFC6902]: https://datatracker.ietf.org/doc/html/rfc6902/#section-4
 
+# Flatten and find
+
+It can sometimes be helpful to explore JSON documents by 'flattening'
+the JSON to an object of path / value pairs, where the path is the
+[JSONpointer][] path to the corresponding value. It is then
+straight-forward to search this flattened object for, e.g., the path
+to a known field or value. As an example, consider the object
+
+```{r}
+codes <- '{
+    "discards": {
+        "1000": "Record does not exist",
+        "1004": "Queue limit exceeded",
+        "1010": "Discarding timed-out partial msg"
+    },
+    "warnings": {
+        "0": "Phone number missing country code",
+        "1": "State code missing",
+        "2": "Zip code missing"
+    }
+}'
+```
+
+The 'flat' JSON of this can be represented as named list (using
+`str()` to provide a compact visual representation)
+
+```{r}
+j_flatten(codes, as = "R") |>
+    str()
+```
+
+The names of the list are JSONpointer paths, so can be used in
+`j_query()` and `j_pivot()` as appropriate
+
+```{r}
+j_query(codes, "/discards/1010")
+```
+
+There are two ways to find known keys and values. The first is to use
+exact matching to one or more keys or values, e.g.,
+
+```{r}
+j_find_values(
+    codes, c("Record does not exist", "State code missing"),
+    as = "tibble"
+)
+j_find_keys(codes, "warnings", as = "tibble")
+```
+
+It is also possible to match using a regular expression.
+
+```{r}
+j_find_values_grep(codes, "missing", as = "tibble")
+j_find_keys_grep(codes, "card.*/100", as = "tibble") # span key delimiters
+```
+
+Keys are always character vectors, but values can be of different
+type; `j_find_values()` supports searches on these, provided the
+search values are of the same type.
+
+```{r}
+j <- '{"x":[1,[2, 3]],"y":{"a":4}}'
+j_flatten(j, as = "R") |> str()
+j_find_values(j, c(2, 4), as = "tibble")
+```
+
+A common operation might be to find the path to a know value, and then
+to query the original JSON to find the object in which the value is
+contained.
+
+```{r}
+j_find_values(j, 3, as = "tibble")
+## path to '3' is '/x/1/1', so containing object is at 'x/1'
+j_query(j, "/x/1")
+j_query(j, "/x/1", as = "R")
+```
+
+The first argument to `j_find_*()` can be a JSON object, file, or
+URL. NDJSON files are flattened into character vector, with each
+element the flattened version of the corresponding NDJSON record.
+
 # The JSON parser
 
 The package includes a JSON parser, used with the argument `as = "R"`

From d03e808e0b96143fe4ca6c74628a94df4878e83f Mon Sep 17 00:00:00 2001
From: Martin Morgan <Martin.Morgan@RoswellPark.org>
Date: Wed, 20 Mar 2024 15:51:11 -0400
Subject: [PATCH 4/7] improve `j_flatten()` and `j_find_*()` support for NDJSON

---
 R/flatten.R                  | 232 ++++++++++++++++++++++++-----------
 inst/tinytest/test_flatten.R |  94 ++++++++++++--
 man/flatten.Rd               |  80 ++++++++----
 3 files changed, 303 insertions(+), 103 deletions(-)

diff --git a/R/flatten.R b/R/flatten.R
index 4a27e0e..e21336a 100644
--- a/R/flatten.R
+++ b/R/flatten.R
@@ -1,6 +1,65 @@
+## internal implementation of .j_flatten, always returns a list to
+## simplify j_find_*() processing of both JSON & NDJSON
+.j_flatten <-
+    function(data, object_names, as, ..., n_records, verbose, data_type)
+{
+    ## initialize constants to enable code re-use
+    path <- ""
+    path_type <- j_path_type(path)
+
+    ## validity
+    .j_valid(data_type, object_names, path, path_type, n_records, verbose)
+
+    data <- .as_json_string(data, data_type, ...)
+    result <- do_cpp(
+        cpp_j_flatten, cpp_j_flatten_con,
+        data, data_type, object_names, as, path, path_type,
+        n_records = n_records, verbose = verbose
+    )
+}
+
+## internal function calling grepl with argument list
+.j_find_grepl <-
+    function(pattern, x, grep_args)
+{
+    stopifnot(
+        is.list(grep_args),
+        all(
+            names(grep_args) %in%
+            setdiff(names(formals(grepl)), c("pattern", "x"))
+        )
+    )
+    args <- c(list(pattern = pattern, x = x), grep_args)
+    do.call(grepl, args)
+}
+
+## internal function to format j_find_*() result
+.j_find_format <-
+    function(flattened, as, data_type)
+{
+    result <- lapply(flattened, function(json_record, as) {
+        if (identical(as, "R")) {
+            json_record
+        } else {
+            keys <- names(json_record)
+            values <- unlist(json_record, use.names = FALSE)
+            switch(
+                as,
+                data.frame = data.frame(key = keys, value = values),
+                tibble = tibble::tibble(key = keys, value = values)
+            )
+        }
+    }, as)
+
+    if (data_type[[1]] %in% c("json", "R")) # not NDJSON
+        result <- result[[1]]
+
+    result
+}
+
 #' @rdname flatten
 #'
-#' @title Flatten and find keys or values
+#' @title Flatten and find keys or values in JSON or NDJSON documents
 #'
 #' @description `j_flatten()` transforms a JSON document into a list
 #'     where names are JSONpointer 'keys' and elements are the
@@ -12,14 +71,20 @@
 #'     `j_flatten()`, either "string" or "R". For other functions on
 #'     this page, one of "R", "data.frame", or "tibble".
 #'
-#' @details
+#' @details Functions documented on this page expand `data` into all
+#'     key / value pairs. This is not suitable for very large JSON
+#'     documents.
 #'
-#' Functions documented on this page expand `data` into all key /
-#' value pairs. This is not suitable for very large JSON documents.
+#' @return
 #'
-#' @return `j_flatten()` returns a named list, where `names()` are the
-#'     JSONpointer paths to each element in the JSON document and list
-#'     elements are the corresponding values.
+#' `j_flatten(as = "string")` (default) returns a JSON string
+#' representation of the flattened document, i.e., an object with keys
+#' the JSONpointer paths and values the values at the corresponding
+#' path in the original document.
+#'
+#' `j_flatten(as = "R")` returns a named list, where `names()` are the
+#' JSONpointer paths to each element in the JSON document and list
+#' elements are the corresponding values.
 #'
 #' @examples
 #' json <- '{
@@ -42,54 +107,27 @@
 j_flatten <-
     function(
         data, object_names = "asis", as = "string", ...,
-        n_records = Inf, verbose = FALSE,
-        data_type = j_data_type(data)
+        n_records = Inf, verbose = FALSE, data_type = j_data_type(data)
     )
 {
-    ## initialize constants to enable code re-use
-    path <- ""
-    path_type <- j_path_type(path)
-
-    ## validity
-    .j_valid(data_type, object_names, path, path_type, n_records, verbose)
     stopifnot(.is_scalar_character(as), as %in% c("string", "R"))
-
-    data <- .as_json_string(data, data_type, ...)
-    result <- do_cpp(
-        cpp_j_flatten, cpp_j_flatten_con,
-        data, data_type, object_names, as, path, path_type,
-        n_records = n_records, verbose = verbose
+    result <- .j_flatten(
+        data, object_names, as, ...,
+        n_records = n_records, verbose = verbose, data_type = data_type
     )
-
     if (data_type[[1]] %in% c("json", "R"))
         result <- result[[1]]
 
     result
 }
 
-.j_find_format <-
-    function(flattened, as)
-{
-    if (identical(as, "R")) {
-        flattened
-    } else {
-        keys <- names(flattened)
-        values <- unlist(flattened, use.names = FALSE)
-        switch(
-            as,
-            data.frame = data.frame(key = keys, value = values),
-            tibble = tibble::tibble(key = keys, value = values)
-        )
-    }
-}
-
 #' @rdname flatten
 #'
 #' @description `j_find_values()` finds paths to exactly matching
 #'     values.
 #'
-#' @param values vector of one or more values, all of the same type
-#'     (e.g., double, integer, character).
+#' @param values vector of one or more values to be matched exactly to
+#'     values in the JSON document.
 #'
 #' @return `j_find_values()` and `j_find_values_grep()` return a list
 #'     with names as JSONpointer paths and list elements the matching
@@ -108,18 +146,23 @@ j_flatten <-
 #' @export
 j_find_values <-
     function(
-        data, values, object_names = "asis", as = "R",
-        data_type = j_data_type(data)
+        data, values, object_names = "asis", as = "R", ...,
+        n_records = Inf, verbose = FALSE, data_type = j_data_type(data)
     )
 {
     stopifnot(
         .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
     )
 
-    flattened0 <- j_flatten(data, object_names, "R")
-    flattened <- Filter(\(x) x %in% values, flattened0)
+    result <- .j_flatten(
+        data, object_names, "R", ...,
+        n_records = n_records, verbose = verbose, data_type = data_type
+    )
+    flattened <- lapply(result, function(json_record) {
+        Filter(\(x) x %in% values, json_record)
+    })
 
-    .j_find_format(flattened, as)
+    .j_find_format(flattened, as, data_type)
 }
 
 #' @rdname flatten
@@ -130,8 +173,8 @@ j_find_values <-
 #' @param pattern character(1) regular expression to match values or
 #'     keys.
 #'
-#' @param ... for `j_find_values_grep()` and `j_find_keys_grep()`,
-#'     additional arguments passed to `grepl()`.
+#' @param grep_args list() additional arguments passed to `grepl()`
+#'     when searching on values or paths.
 #'
 #' @examples
 #' j_find_values_grep(json, "missing", as = "tibble")
@@ -139,20 +182,28 @@ j_find_values <-
 #' @export
 j_find_values_grep <-
     function(
-        data, pattern, ..., object_names = "asis", as = "R",
-        data_type = j_data_type(data)
+        data, pattern, object_names = "asis", as = "R", ...,
+        n_records = Inf, verbose = FALSE, data_type = j_data_type(data),
+        grep_args = list()
     )
 {
     stopifnot(
         .is_scalar_character(pattern),
         .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
+        ## FIXME: validate grep_args
     )
 
-    flattened <- j_flatten(data, object_names, "R")
-    values <- unlist(flattened, use.names = FALSE)
-    idx <- grepl(pattern, values, ...)
+    result <- .j_flatten(
+        data, object_names, "R", ...,
+        n_records = n_records, verbose = verbose, data_type = data_type
+    )
+    flattened <- lapply(result, function(json_record, grep_args) {
+        values <- unlist(json_record, use.names = FALSE)
+        idx <- .j_find_grepl(pattern, values, grep_args)
+        json_record[idx]
+    }, grep_args)
 
-    .j_find_format(flattened[idx], as)
+    .j_find_format(flattened, as, data_type)
 }
 
 #' @rdname flatten
@@ -162,10 +213,9 @@ j_find_values_grep <-
 #' @param keys character() vector of one or more keys to be matched
 #'     exactly to path elements.
 #'
-#' @details
-#'
-#' For `j_find_keys()`, the `key` must exactly match one or more
-#' consecutive keys in the JSONpointer path returned by `j_flatten()`.
+#' @details For `j_find_keys()`, the `key` must exactly match one or
+#'     more consecutive keys in the JSONpointer path returned by
+#'     `j_flatten()`.
 #'
 #' @return `j_find_keys()` and `j_find_keys_grep()` returns a list,
 #'     data.frame, or tibble similar to `j_find_values()` and
@@ -179,8 +229,8 @@ j_find_values_grep <-
 #' @export
 j_find_keys <-
     function(
-        data, keys, object_names = "asis", as = "R",
-        data_type = j_data_type(data)
+        data, keys, object_names = "asis", as = "R", ...,
+        n_records = Inf, verbose = FALSE, data_type = j_data_type(data)
     )
 {
     stopifnot(
@@ -188,13 +238,19 @@ j_find_keys <-
         .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
     )
 
-    flattened <- j_flatten(data, object_names, "R")
-    keys0 <- names(flattened)
-    keys1 <- strsplit(keys0, "/")
-    idx1 <- unlist(keys1) %in% keys
-    idx <- unique(rep(seq_along(keys1), lengths(keys1))[idx1])
+    result <- .j_flatten(
+        data, object_names, "R", ...,
+        n_records = n_records, verbose = verbose, data_type = data_type
+    )
+    flattened <- lapply(result, function(json_record) {
+        keys0 <- names(json_record)
+        keys1 <- strsplit(keys0, "/")
+        idx1 <- unlist(keys1) %in% keys
+        idx <- unique(rep(seq_along(keys1), lengths(keys1))[idx1])
+        json_record[idx]
+    })
 
-    .j_find_format(flattened[idx], as)
+    .j_find_format(flattened, as, data_type)
 }
 
 #' @rdname flatten
@@ -202,10 +258,8 @@ j_find_keys <-
 #' @description `j_find_keys_grep()` finds paths to keys matching a
 #'     regular expression.
 #'
-#' @details
-#'
-#' For `j_find_keys_grep()`, the `key` can define a pattern that spans
-#' across JSONpointer path elements.
+#' @details For `j_find_keys_grep()`, the `key` can define a pattern
+#'     that spans across JSONpointer path elements.
 #'
 #' @examples
 #' j_find_keys_grep(json, "discard", as = "tibble")
@@ -215,8 +269,9 @@ j_find_keys <-
 #' @export
 j_find_keys_grep <-
     function(
-        data, pattern, ..., object_names = "asis", as = "R",
-        data_type = j_data_type(data)
+        data, pattern, object_names = "asis", as = "R", ...,
+        n_records = Inf, verbose = FALSE, data_type = j_data_type(data),
+        grep_args = list()
     )
 {
     stopifnot(
@@ -224,8 +279,37 @@ j_find_keys_grep <-
         .is_scalar_character(as), as %in% c("R", "data.frame", "tibble")
     )
 
-    flattened <- j_flatten(data, object_names, "R")
-    idx <- grepl(pattern, names(flattened), ...)
+    result <- .j_flatten(
+        data, object_names, "R", ...,
+        n_records = n_records, verbose = verbose, data_type = data_type
+    )
+    flattened <- lapply(result, function(json_record, grep_args) {
+        idx <- .j_find_grepl(pattern, names(json_record), grep_args)
+        json_record[idx]
+    }, grep_args)
 
-    .j_find_format(flattened[idx], as)
+    .j_find_format(flattened, as, data_type)
 }
+
+#' @rdname flatten
+#'
+#' @name flatten_NDJSON
+#'
+#' @description For NDJSON documents, the result is either a character
+#'     vector (for `as = "string"`) or list of *R* objects, one
+#'     element for each NDJSON record.
+#'
+#' @return For NDJSON documents, the result is a vector paralleling
+#'     the NDJSON document, with `j_flatten()` applied to each element
+#'     of the NDJSON document.
+#'
+#' @examples
+#' ## NDJSON
+#'
+#' ndjson_file <-
+#'     system.file(package = "rjsoncons", "extdata", "example.ndjson")
+#' j_flatten(ndjson_file) |>
+#'     noquote()
+#' j_find_values_grep(ndjson_file, "e") |>
+#'     str()
+NULL
diff --git a/inst/tinytest/test_flatten.R b/inst/tinytest/test_flatten.R
index 87d88ed..f11a25e 100644
--- a/inst/tinytest/test_flatten.R
+++ b/inst/tinytest/test_flatten.R
@@ -1,4 +1,7 @@
-ndjson_file <- system.file(package = "rjsoncons", "extdata", "example.ndjson")
+##
+## JSON
+##
+
 json_file <- system.file(package = "rjsoncons", "extdata", "flatten_data.json")
 json <- paste0(trimws(readLines(json_file, warn = FALSE)), collapse = "")
 ojson <- paste0(
@@ -36,9 +39,9 @@ oflat <- paste0(
 )
 flat_r <- list(
     `/discards/1000` = "Record does not exist",
-    `/discards/1004` = "Queue limit exceeded", 
+    `/discards/1004` = "Queue limit exceeded",
     `/discards/1010` = "Discarding timed-out partial msg",
-    `/warnings/0` = "Phone number missing country code", 
+    `/warnings/0` = "Phone number missing country code",
     `/warnings/1` = "State code missing",
     `/warnings/2` = "Zip code missing"
 )
@@ -55,9 +58,6 @@ expect_identical(j_flatten(json_file, "asis", as = "R"), flat_r)
 expect_identical(j_flatten(ojson), oflat)
 expect_identical(j_flatten(ojson, "sort"), flat)
 
-expect_identical(length(j_flatten(ndjson_file)), 4L)
-expect_identical(length(j_flatten(ndjson_file, n_records = 2)), 2L)
-
 ## j_find_values
 
 expect_identical(j_find_values(json, "State code missing"), flat_r[5])
@@ -89,8 +89,6 @@ expect_identical( # as = "tibble"
     info = "as = 'tibble'"
 )
 
-j_find_values(ndjson_file, "WA") |> str()
-
 expect_identical(j_find_values(json, "foo"), named_list)
 
 ## j_find_values_grep
@@ -106,3 +104,83 @@ expect_identical(j_find_keys(json, c("1000", "1")), flat_r[c(1, 5)])
 
 expect_identical(j_find_keys_grep(json, "warn"), flat_r[4:6])
 expect_identical(j_find_keys_grep(json, "ard.*10$"), flat_r[3])
+
+##
+## NDJSON
+##
+
+ndjson_file <- system.file(package = "rjsoncons", "extdata", "example.ndjson")
+flat_ndjson <- c(
+    '{"/name":"Seattle","/state":"WA"}', '{"/name":"New York","/state":"NY"}',
+    '{"/name":"Bellevue","/state":"WA"}', '{"/name":"Olympia","/state":"WA"}'
+)
+
+## j_flatten
+
+expect_identical(j_flatten(ndjson_file), flat_ndjson)
+expect_identical(j_flatten(ndjson_file, n_records = 2), flat_ndjson[1:2])
+
+## j_find_values*()
+
+expect_identical(
+    j_find_values(ndjson_file, "WA"),
+    list(
+        list(`/state` = "WA"), named_list,
+        list(`/state` = "WA"), list(`/state` = "WA")
+    )
+)
+expect_identical(
+    j_find_values(ndjson_file, "WA", n_records = 2),
+    list(list(`/state` = "WA"), named_list)
+)
+expect_identical(
+    j_find_values_grep(ndjson_file, "e"),
+    list(
+        list(`/name` = "Seattle"), list(`/name` = "New York"),
+        list(`/name` = "Bellevue"), named_list
+    )
+)
+
+expect_identical(
+    j_find_values(ndjson_file, "WA"),
+    list(
+        list(`/state` = "WA"), named_list,
+        list(`/state` = "WA"), list(`/state` = "WA")
+    )
+)
+expect_identical(
+    j_find_values(ndjson_file, "WA", n_records = 2),
+    list(
+        list(`/state` = "WA"), named_list
+    )
+)
+
+expect_identical(
+    j_find_values_grep(ndjson_file, "e", n_records = 2),
+    list(
+        list(`/name` = "Seattle"), list(`/name` = "New York")
+    )
+)
+
+## j_find_keys*()
+
+expect_identical(
+    j_find_keys(ndjson_file, "name"),
+    list(
+        list(`/name` = "Seattle"), list(`/name` = "New York"),
+        list(`/name` = "Bellevue"), list(`/name` = "Olympia")
+    )
+)
+expect_identical(
+    j_find_keys(ndjson_file, "name", n_records = 2),
+    list(
+        list(`/name` = "Seattle"), list(`/name` = "New York")
+    )
+)
+
+expect_identical(
+    j_find_keys_grep(ndjson_file, "ame", n_records = 2),
+    list(
+        list(`/name` = "Seattle"), list(`/name` = "New York")
+    )
+)
diff --git a/man/flatten.Rd b/man/flatten.Rd
index ee5826c..be7fed4 100644
--- a/man/flatten.Rd
+++ b/man/flatten.Rd
@@ -6,7 +6,8 @@
 \alias{j_find_values_grep}
 \alias{j_find_keys}
 \alias{j_find_keys_grep}
-\title{Flatten and find keys or values}
+\alias{flatten_NDJSON}
+\title{Flatten and find keys or values in JSON or NDJSON documents}
 \usage{
 j_flatten(
   data,
@@ -22,34 +23,46 @@ j_find_values(
   data,
   values,
   object_names = "asis",
-  as = "list",
+  as = "R",
+  ...,
+  n_records = Inf,
+  verbose = FALSE,
   data_type = j_data_type(data)
 )
 
 j_find_values_grep(
   data,
   pattern,
-  ...,
   object_names = "asis",
-  as = "list",
-  data_type = j_data_type(data)
+  as = "R",
+  ...,
+  n_records = Inf,
+  verbose = FALSE,
+  data_type = j_data_type(data),
+  grep_args = list()
 )
 
 j_find_keys(
   data,
   keys,
   object_names = "asis",
-  as = "list",
+  as = "R",
+  ...,
+  n_records = Inf,
+  verbose = FALSE,
   data_type = j_data_type(data)
 )
 
 j_find_keys_grep(
   data,
   pattern,
-  ...,
   object_names = "asis",
-  as = "list",
-  data_type = j_data_type(data)
+  as = "R",
+  ...,
+  n_records = Inf,
+  verbose = FALSE,
+  data_type = j_data_type(data),
+  grep_args = list()
 )
 }
 \arguments{
@@ -62,10 +75,9 @@ object parsed to a JSON string using \code{jsonlite::toJSON()}.}
 
 \item{as}{character(1) describing the return type.  For
 \code{j_flatten()}, either "string" or "R". For other functions on
-this page, one of "list", "data.frame", or "tibble".}
+this page, one of "R", "data.frame", or "tibble".}
 
-\item{...}{for \code{j_find_values_grep()} and \code{j_find_keys_grep()},
-additional arguments passed to \code{grepl()}.}
+\item{...}{passed to \code{jsonlite::toJSON} when \code{data} is an \emph{R} object.}
 
 \item{n_records}{numeric(1) maximum number of NDJSON records parsed.}
 
@@ -75,17 +87,25 @@ files.}
 \item{data_type}{character(1) type of \code{data}; one of \code{"json"},
 \code{"ndjson"}. Inferred from \code{data} using \code{j_data_type()}.}
 
-\item{values}{vector of one or more values, all of the same type
-(e.g., double, integer, character).}
+\item{values}{vector of one or more values to be matched exactly to
+values in the JSON document.}
 
 \item{pattern}{character(1) regular expression to match values or
 keys.}
 
+\item{grep_args}{list() additional arguments passed to \code{grepl()}
+when searching on values or paths.}
+
 \item{keys}{character() vector of one or more keys to be matched
 exactly to path elements.}
 }
 \value{
-\code{j_flatten()} returns a named list, where \code{names()} are the
+\code{j_flatten(as = "string")} (default) returns a JSON string
+representation of the flattened document, i.e., an object with keys
+the JSONpointer paths and values the values at the corresponding
+path in the original document.
+
+\code{j_flatten(as = "R")} returns a named list, where \code{names()} are the
 JSONpointer paths to each element in the JSON document and list
 elements are the corresponding values.
 
@@ -98,6 +118,10 @@ values, or a \code{data.frame} or \code{tibble} with columns \code{path} and
 \code{j_find_keys()} and \code{j_find_keys_grep()} returns a list,
 data.frame, or tibble similar to \code{j_find_values()} and
 \code{j_find_values_grep()}.
+
+For NDJSON documents, the result is a vector paralleling
+the NDJSON document, with \code{j_flatten()} applied to each element
+of the NDJSON document.
 }
 \description{
 \code{j_flatten()} transforms a JSON document into a list
@@ -114,16 +138,22 @@ a regular expression.
 
 \code{j_find_keys_grep()} finds paths to keys matching a
 regular expression.
+
+For NDJSON documents, the result is either a character
+vector (for \code{as = "string"}) or list of \emph{R} objects, one
+element for each NDJSON record.
 }
 \details{
-Functions documented on this page expand \code{data} into all key /
-value pairs. This is not suitable for very large JSON documents.
+Functions documented on this page expand \code{data} into all
+key / value pairs. This is not suitable for very large JSON
+documents.
 
-For \code{j_find_keys()}, the \code{key} must exactly match one or more
-consecutive keys in the JSONpointer path returned by \code{j_flatten()}.
+For \code{j_find_keys()}, the \code{key} must exactly match one or
+more consecutive keys in the JSONpointer path returned by
+\code{j_flatten()}.
 
-For \code{j_find_keys_grep()}, the \code{key} can define a pattern that spans
-across JSONpointer path elements.
+For \code{j_find_keys_grep()}, the \code{key} can define a pattern
+that spans across JSONpointer path elements.
 }
 \examples{
 json <- '{
@@ -159,4 +189,12 @@ j_find_keys_grep(json, "discard", as = "tibble")
 j_find_keys_grep(json, "1", as = "tibble")
 j_find_keys_grep(json, "car.*/101", as = "tibble")
 
+## NDJSON
+
+ndjson_file <-
+    system.file(package = "rjsoncons", "extdata", "example.ndjson")
+j_flatten(ndjson_file) |>
+    noquote()
+j_find_values_grep(ndjson_file, "e") |>
+    str()
 }

From a08d5f1fe0c8074b903a04af526c97223e7e0165 Mon Sep 17 00:00:00 2001
From: Martin Morgan <Martin.Morgan@RoswellPark.org>
Date: Thu, 21 Mar 2024 11:04:23 -0400
Subject: [PATCH 5/7] version bump and NEWS update

---
 DESCRIPTION | 2 +-
 NEWS.md     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ff26239..32c862e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rjsoncons
 Title: 'C++' Header-Only 'jsoncons' Library for 'JSON' Queries
-Version: 1.2.0.9700
+Version: 1.2.0.9701
 Authors@R: c(
     person(
         "Martin", "Morgan", role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index ac4d112..d008726 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 # rjsoncons 1.3.0
 
-- (1.2.0.9700) add key and value search with `j_flatten()`, `j_find_*()`
+- (1.2.0.9701) add key and value search with `j_flatten()`, `j_find_*()`
 - (1.2.0.9602) compile on Ubuntu 18.04
   <https://github.com/mtmorgan/rjsoncons/issues/3>
 - (1.2.0.9503) add JSON patch support with `j_patch_apply()`,

From cc217fb0a880969ea6d6a91e0319276b2c7d41b7 Mon Sep 17 00:00:00 2001
From: Martin Morgan <Martin.Morgan@RoswellPark.org>
Date: Thu, 21 Mar 2024 11:04:43 -0400
Subject: [PATCH 6/7] rename `j_find_*()` column 'key' as 'path'

---
 DESCRIPTION                  |  2 +-
 NEWS.md                      |  2 +-
 R/flatten.R                  | 22 +++++++++++-----------
 inst/tinytest/test_flatten.R |  4 ++--
 vignettes/a_rjsoncons.Rmd    |  5 ++---
 5 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 32c862e..8e965a6 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rjsoncons
 Title: 'C++' Header-Only 'jsoncons' Library for 'JSON' Queries
-Version: 1.2.0.9701
+Version: 1.2.0.9702
 Authors@R: c(
     person(
         "Martin", "Morgan", role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index d008726..74ed63f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 # rjsoncons 1.3.0
 
-- (1.2.0.9701) add key and value search with `j_flatten()`, `j_find_*()`
+- (1.2.0.9702) add key and value search with `j_flatten()`, `j_find_*()`
 - (1.2.0.9602) compile on Ubuntu 18.04
   <https://github.com/mtmorgan/rjsoncons/issues/3>
 - (1.2.0.9503) add JSON patch support with `j_patch_apply()`,
diff --git a/R/flatten.R b/R/flatten.R
index e21336a..73038b1 100644
--- a/R/flatten.R
+++ b/R/flatten.R
@@ -41,12 +41,12 @@
         if (identical(as, "R")) {
             json_record
         } else {
-            keys <- names(json_record)
+            paths <- names(json_record)
             values <- unlist(json_record, use.names = FALSE)
             switch(
                 as,
-                data.frame = data.frame(key = keys, value = values),
-                tibble = tibble::tibble(key = keys, value = values)
+                data.frame = data.frame(path = paths, value = values),
+                tibble = tibble::tibble(path = paths, value = values)
             )
         }
     }, as)
@@ -62,7 +62,7 @@
 #' @title Flatten and find keys or values in JSON or NDJSON documents
 #'
 #' @description `j_flatten()` transforms a JSON document into a list
-#'     where names are JSONpointer 'keys' and elements are the
+#'     where names are JSONpointer 'paths' and elements are the
 #'     corresponding 'values' from the JSON document.
 #'
 #' @inheritParams j_query
@@ -72,14 +72,14 @@
 #'     this page, one of "R", "data.frame", or "tibble".
 #'
 #' @details Functions documented on this page expand `data` into all
-#'     key / value pairs. This is not suitable for very large JSON
+#'     path / value pairs. This is not suitable for very large JSON
 #'     documents.
 #'
 #' @return
 #'
 #' `j_flatten(as = "string")` (default) returns a JSON string
 #' representation of the flattened document, i.e., an object with keys
-#' the JSONpointer paths and values the values at the corresponding
+#' the JSONpointer paths and values the value at the corresponding
 #' path in the original document.
 #'
 #' `j_flatten(as = "R")` returns a named list, where `names()` are the
@@ -171,7 +171,7 @@ j_find_values <-
 #'     a regular expression.
 #'
 #' @param pattern character(1) regular expression to match values or
-#'     keys.
+#'     paths.
 #'
 #' @param grep_args list() additional arguments passed to `grepl()`
 #'     when searching on values or paths.
@@ -243,10 +243,10 @@ j_find_keys <-
         n_records = n_records, verbose = verbose, data_type = data_type
     )
     flattened <- lapply(result, function(json_record) {
-        keys0 <- names(json_record)
-        keys1 <- strsplit(keys0, "/")
-        idx1 <- unlist(keys1) %in% keys
-        idx <- unique(rep(seq_along(keys1), lengths(keys1))[idx1])
+        paths <- names(json_record)
+        keys0 <- strsplit(paths, "/")
+        idx0 <- unlist(keys0) %in% keys
+        idx <- unique(rep(seq_along(keys0), lengths(keys0))[idx0])
         json_record[idx]
     })
 
diff --git a/inst/tinytest/test_flatten.R b/inst/tinytest/test_flatten.R
index f11a25e..4784dc4 100644
--- a/inst/tinytest/test_flatten.R
+++ b/inst/tinytest/test_flatten.R
@@ -72,7 +72,7 @@ expect_identical(
         as = "data.frame"
     ),
     data.frame(
-        key = names(flat_r[c(2, 5)]),
+        path = names(flat_r[c(2, 5)]),
         value = unlist(flat_r[c(2, 5)], use.names = FALSE)
     ),
     info = "as = 'data.frame'"
@@ -83,7 +83,7 @@ expect_identical( # as = "tibble"
         as = "tibble"
     ),
     tibble::tibble(
-        key = names(flat_r[c(2, 5)]),
+        path = names(flat_r[c(2, 5)]),
         value = unlist(flat_r[c(2, 5)], use.names = FALSE)
     ),
     info = "as = 'tibble'"
diff --git a/vignettes/a_rjsoncons.Rmd b/vignettes/a_rjsoncons.Rmd
index 5a838cf..6210bb9 100644
--- a/vignettes/a_rjsoncons.Rmd
+++ b/vignettes/a_rjsoncons.Rmd
@@ -400,8 +400,7 @@ j_find_keys_grep(codes, "card.*/100", as = "tibble") # span key delimiters
 ```
 
 Keys are always character vectors, but values can be of different
-type; `j_find_values()` supports searches on these, provided the
-search values are of the same type.
+type; `j_find_values()` supports searches on these.
 
 ```{r}
 j <- '{"x":[1,[2, 3]],"y":{"a":4}}'
@@ -415,7 +414,7 @@ contained.
 
 ```{r}
 j_find_values(j, 3, as = "tibble")
-## path to '3' is '/x/1/1', so containing object is at 'x/1'
+## path to '3' is '/x/1/1', so containing object is at '/x/1'
 j_query(j, "/x/1")
 j_query(j, "/x/1", as = "R")
 ```

From 322da040b4b67aced21376e9d76798607db8940d Mon Sep 17 00:00:00 2001
From: Martin Morgan <Martin.Morgan@RoswellPark.org>
Date: Thu, 21 Mar 2024 11:07:04 -0400
Subject: [PATCH 7/7] version bump and NEWS update

- update src/flatten to avoid cpp11/declarations.hpp (issue 3)
---
 DESCRIPTION     | 2 +-
 NEWS.md         | 2 +-
 src/flatten.cpp | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8e965a6..f9692c8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rjsoncons
 Title: 'C++' Header-Only 'jsoncons' Library for 'JSON' Queries
-Version: 1.2.0.9702
+Version: 1.2.0.9703
 Authors@R: c(
     person(
         "Martin", "Morgan", role = c("aut", "cre"),
diff --git a/NEWS.md b/NEWS.md
index 74ed63f..f637c22 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 # rjsoncons 1.3.0
 
-- (1.2.0.9702) add key and value search with `j_flatten()`, `j_find_*()`
+- (1.2.0.9703) add key and value search with `j_flatten()`, `j_find_*()`
 - (1.2.0.9602) compile on Ubuntu 18.04
   <https://github.com/mtmorgan/rjsoncons/issues/3>
 - (1.2.0.9503) add JSON patch support with `j_patch_apply()`,
diff --git a/src/flatten.cpp b/src/flatten.cpp
index 4a9e083..392cf3b 100644
--- a/src/flatten.cpp
+++ b/src/flatten.cpp
@@ -1,10 +1,12 @@
 #include <jsoncons/json.hpp>
 #include <jsoncons_ext/jsonpointer/jsonpointer.hpp>
 
-#include <cpp11/declarations.hpp>
 #include "enum_index.h"
 #include "rquerypivot.h"
 
+#include <cpp11/sexp.hpp>
+#include <cpp11/protect.hpp> // 'stop'
+
 using namespace jsoncons;
 
 [[cpp11::register]]