Skip to content

Commit

Permalink
Merge branch 'ndjson-refactor'
Browse files Browse the repository at this point in the history
  • Loading branch information
mtmorgan committed Jan 25, 2024
2 parents e933c99 + 9b2c28d commit 96898a3
Show file tree
Hide file tree
Showing 13 changed files with 362 additions and 182 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rjsoncons
Title: 'C++' Header-Only 'jsoncons' Library for 'JSON' Queries
Version: 1.1.0.9400
Version: 1.1.0.9401
Authors@R: c(
person(
"Martin", "Morgan", role = c("aut", "cre"),
Expand Down
12 changes: 10 additions & 2 deletions R/cpp11.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ cpp_version <- function() {
.Call(`_rjsoncons_cpp_version`)
}

cpp_as_r <- function(data, jtype) {
.Call(`_rjsoncons_cpp_as_r`, data, jtype)
cpp_as_r <- function(data, object_names) {
.Call(`_rjsoncons_cpp_as_r`, data, object_names)
}

cpp_r_json_init <- function(object_names, path, as, data_type, path_type) {
Expand All @@ -20,6 +20,14 @@ cpp_r_json_pivot <- function(ext, data, object_names) {
invisible(.Call(`_rjsoncons_cpp_r_json_pivot`, ext, data, object_names))
}

cpp_r_json_query_raw <- function(ext, prefix, bin, n_records, object_names) {
.Call(`_rjsoncons_cpp_r_json_query_raw`, ext, prefix, bin, n_records, object_names)
}

cpp_r_json_pivot_raw <- function(ext, prefix, bin, n_records, object_names) {
.Call(`_rjsoncons_cpp_r_json_pivot_raw`, ext, prefix, bin, n_records, object_names)
}

cpp_r_json_finish <- function(ext, object_names) {
.Call(`_rjsoncons_cpp_r_json_finish`, ext, object_names)
}
4 changes: 4 additions & 0 deletions R/json.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ json_query <-
if (any(c("file", "url") %in% data_type))
data <- readLines(data, warn = FALSE)
data <- .as_json_string(data, ..., data_type = data_type[[1]])
if (identical(data_type, "R"))
data_type <- "json"

data_type <- head(data_type, 1L)
ex <- cpp_r_json_init(object_names, path, as, data_type, path_type)
Expand All @@ -27,6 +29,8 @@ json_pivot <-
if (any(c("file", "url") %in% data_type))
data <- readLines(data, warn = FALSE)
data <- .as_json_string(data, ..., data_type = data_type[[1]])
if (identical(data_type, "R"))
data_type <- "json"

as0 <- ifelse(identical(as, "string"), as, "R")
ex <- cpp_r_json_init(object_names, path, as0, data_type, path_type)
Expand Down
38 changes: 22 additions & 16 deletions R/ndjson.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,25 @@ ndjson_connection <-
on.exit(close(fl))
}
data_type <- head(data_type, 1L)
chunk_size <- 1024L * 8L

chunk_size <- as.integer(2^20) # 1 Mb chunks
ex <- cpp_r_json_init(object_names, path, as, data_type, path_type)
i <- lines <- 0L
n_lines <- 0L
prefix <- raw()
if (verbose)
cli::cli_progress_message("{lines} ndjson records processed")
cli::cli_progress_message("{n_lines} records processed")
repeat {
chunk_size <- min(chunk_size, n_records)
ndjson <- readLines(fl, chunk_size)
if (!length(ndjson))
if (n_records <= 0L)
break
i <- i + 1L
lines <- lines + length(ndjson)
n_records <- max(n_records - chunk_size, 0L)
if (verbose)
cli::cli_progress_update()
cpp_function(ex, ndjson, object_names)

bin <- readBin(fl, raw(), chunk_size)
if (!length(bin))
break
result <- cpp_function(ex, prefix, bin, n_records, object_names)
prefix <- result$prefix
n_lines <- n_lines + result$n_lines
n_records <- n_records - result$n_lines
}
if (verbose)
cli::cli_progress_done()
Expand All @@ -64,14 +66,17 @@ ndjson_query <-
.is_scalar_logical(verbose)
)

n_records <- as.integer(min(n_records, .Machine$integer.max))
if (.is_j_data_type_connection(data_type)) {
r_function <- ndjson_connection
cpp_function <- cpp_r_json_query_raw
} else {
r_function <- ndjson_character
cpp_function <- cpp_r_json_query
}

r_function(
cpp_r_json_query,
cpp_function,
data, path, object_names, as, n_records, verbose,
path_type, data_type
)
Expand All @@ -90,15 +95,18 @@ ndjson_pivot <-
.is_scalar_logical(verbose)
)

n_records <- as.integer(min(n_records, .Machine$integer.max))
if (.is_j_data_type_connection(data_type)) {
r_function <- ndjson_connection
cpp_function <- cpp_r_json_pivot_raw
} else {
r_function <- ndjson_character
cpp_function <- cpp_r_json_pivot
}

as0 <- ifelse(identical(as, "string"), "string", "R")
pivot <- r_function(
cpp_r_json_pivot,
cpp_function,
data, path, object_names, as0, n_records, verbose,
path_type, data_type
)
Expand All @@ -122,8 +130,6 @@ ndjson_pivot <-
string = result,
R = result,
data.frame = as.data.frame(result),
tibble =
as.data.frame(result) |>
tibble::as_tibble()
tibble = tibble::as_tibble(result)
)
}
9 changes: 9 additions & 0 deletions inst/tinytest/test_utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,12 @@ expect_false(.is_scalar_character(c("a", "b")))
expect_false(.is_scalar_character(NA_character_))
expect_false(.is_scalar_character(""))
expect_true(.is_scalar_character("", z.ok = TRUE))

## C++ utilities.h
## 'object_names' should be in c("asis", "sort")
json <- '{"a": 1, "c": 3, "b": 2}'
expect_identical(
rjsoncons:::cpp_as_r(json, "asis"),
list(a = 1L, c = 3L, b = 2L)
)
expect_error(rjsoncons:::cpp_as_r(json, "foo"), "'foo' unknown")
34 changes: 25 additions & 9 deletions src/cpp11.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ extern "C" SEXP _rjsoncons_cpp_version() {
END_CPP11
}
// rjsoncons.cpp
sexp cpp_as_r(std::string data, std::string jtype);
extern "C" SEXP _rjsoncons_cpp_as_r(SEXP data, SEXP jtype) {
sexp cpp_as_r(std::string data, const std::string object_names);
extern "C" SEXP _rjsoncons_cpp_as_r(SEXP data, SEXP object_names) {
BEGIN_CPP11
return cpp11::as_sexp(cpp_as_r(cpp11::as_cpp<cpp11::decay_t<std::string>>(data), cpp11::as_cpp<cpp11::decay_t<std::string>>(jtype)));
return cpp11::as_sexp(cpp_as_r(cpp11::as_cpp<cpp11::decay_t<std::string>>(data), cpp11::as_cpp<cpp11::decay_t<const std::string>>(object_names)));
END_CPP11
}
// rjsoncons.cpp
Expand All @@ -43,6 +43,20 @@ extern "C" SEXP _rjsoncons_cpp_r_json_pivot(SEXP ext, SEXP data, SEXP object_nam
END_CPP11
}
// rjsoncons.cpp
cpp11::list cpp_r_json_query_raw(sexp ext, raws prefix, raws bin, int n_records, const std::string object_names);
extern "C" SEXP _rjsoncons_cpp_r_json_query_raw(SEXP ext, SEXP prefix, SEXP bin, SEXP n_records, SEXP object_names) {
BEGIN_CPP11
return cpp11::as_sexp(cpp_r_json_query_raw(cpp11::as_cpp<cpp11::decay_t<sexp>>(ext), cpp11::as_cpp<cpp11::decay_t<raws>>(prefix), cpp11::as_cpp<cpp11::decay_t<raws>>(bin), cpp11::as_cpp<cpp11::decay_t<int>>(n_records), cpp11::as_cpp<cpp11::decay_t<const std::string>>(object_names)));
END_CPP11
}
// rjsoncons.cpp
cpp11::list cpp_r_json_pivot_raw(sexp ext, const raws prefix, const raws bin, int n_records, const std::string object_names);
extern "C" SEXP _rjsoncons_cpp_r_json_pivot_raw(SEXP ext, SEXP prefix, SEXP bin, SEXP n_records, SEXP object_names) {
BEGIN_CPP11
return cpp11::as_sexp(cpp_r_json_pivot_raw(cpp11::as_cpp<cpp11::decay_t<sexp>>(ext), cpp11::as_cpp<cpp11::decay_t<const raws>>(prefix), cpp11::as_cpp<cpp11::decay_t<const raws>>(bin), cpp11::as_cpp<cpp11::decay_t<int>>(n_records), cpp11::as_cpp<cpp11::decay_t<const std::string>>(object_names)));
END_CPP11
}
// rjsoncons.cpp
cpp11::sexp cpp_r_json_finish(sexp ext, const std::string object_names);
extern "C" SEXP _rjsoncons_cpp_r_json_finish(SEXP ext, SEXP object_names) {
BEGIN_CPP11
Expand All @@ -52,12 +66,14 @@ extern "C" SEXP _rjsoncons_cpp_r_json_finish(SEXP ext, SEXP object_names) {

extern "C" {
static const R_CallMethodDef CallEntries[] = {
{"_rjsoncons_cpp_as_r", (DL_FUNC) &_rjsoncons_cpp_as_r, 2},
{"_rjsoncons_cpp_r_json_finish", (DL_FUNC) &_rjsoncons_cpp_r_json_finish, 2},
{"_rjsoncons_cpp_r_json_init", (DL_FUNC) &_rjsoncons_cpp_r_json_init, 5},
{"_rjsoncons_cpp_r_json_pivot", (DL_FUNC) &_rjsoncons_cpp_r_json_pivot, 3},
{"_rjsoncons_cpp_r_json_query", (DL_FUNC) &_rjsoncons_cpp_r_json_query, 3},
{"_rjsoncons_cpp_version", (DL_FUNC) &_rjsoncons_cpp_version, 0},
{"_rjsoncons_cpp_as_r", (DL_FUNC) &_rjsoncons_cpp_as_r, 2},
{"_rjsoncons_cpp_r_json_finish", (DL_FUNC) &_rjsoncons_cpp_r_json_finish, 2},
{"_rjsoncons_cpp_r_json_init", (DL_FUNC) &_rjsoncons_cpp_r_json_init, 5},
{"_rjsoncons_cpp_r_json_pivot", (DL_FUNC) &_rjsoncons_cpp_r_json_pivot, 3},
{"_rjsoncons_cpp_r_json_pivot_raw", (DL_FUNC) &_rjsoncons_cpp_r_json_pivot_raw, 5},
{"_rjsoncons_cpp_r_json_query", (DL_FUNC) &_rjsoncons_cpp_r_json_query, 3},
{"_rjsoncons_cpp_r_json_query_raw", (DL_FUNC) &_rjsoncons_cpp_r_json_query_raw, 5},
{"_rjsoncons_cpp_version", (DL_FUNC) &_rjsoncons_cpp_version, 0},
{NULL, NULL, 0}
};
}
Expand Down
16 changes: 11 additions & 5 deletions src/j_as.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "utilities.h"

using namespace jsoncons;
using namespace rjsoncons;

enum class r_type : uint8_t
{
Expand Down Expand Up @@ -213,15 +214,20 @@ sexp as_r(const Json j)
// json to R

template<class Json>
cpp11::sexp j_as(Json j, std::string as)
cpp11::sexp j_as(Json j, rjsoncons::as as)
{
switch(hash(as.c_str())) {
case hash("string"): return as_sexp( j.template as<std::string>() );
case hash("R"): return as_r<Json>(j);
default: cpp11::stop("unknown `as = '" + as + "'`");
switch(as) {
case as::string: return as_sexp( j.template as<std::string>() );
case as::R: return as_r<Json>(j);
}
}

template<class Json>
cpp11::sexp j_as(Json j, std::string as)
{
j_as(j, enum_index(as_map, as));
}

template<class Json>
sexp as_r_impl(const std::string data)
{
Expand Down
79 changes: 0 additions & 79 deletions src/j_pivot.h

This file was deleted.

21 changes: 0 additions & 21 deletions src/j_query.h

This file was deleted.

Loading

0 comments on commit 96898a3

Please sign in to comment.