From 4540dff849ce723016d0129f1ba85524faf5952b Mon Sep 17 00:00:00 2001 From: "Jennifer (Jenny) Bryan" Date: Mon, 3 Jan 2022 08:42:07 -0800 Subject: [PATCH] Push re-encoding of xls path into XlsWorkBook constructor (#674) --- R/excel-sheets.R | 2 +- R/read_excel.R | 2 +- R/utils.R | 4 ---- src/XlsWorkBook.h | 8 +++++++- src/XlsWorkSheet.cpp | 5 ++--- src/cpp11.cpp | 4 ++-- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/R/excel-sheets.R b/R/excel-sheets.R index 25cc0a0f..ec9afeb3 100644 --- a/R/excel-sheets.R +++ b/R/excel-sheets.R @@ -18,7 +18,7 @@ excel_sheets <- function(path) { path <- check_file(path) format <- check_format(path) - path <- normalize_path(path) + path <- normalizePath(path) switch(format, xls = xls_sheets(path), diff --git a/R/read_excel.R b/R/read_excel.R index 43f4fea8..57cb4ba4 100644 --- a/R/read_excel.R +++ b/R/read_excel.R @@ -194,7 +194,7 @@ read_excel_ <- function(path, sheet = NULL, range = NULL, sheets_fun <- xlsx_sheets read_fun <- read_xlsx_ } - path <- normalize_path(path) + path <- normalizePath(path) sheet <- standardise_sheet(sheet, range, sheets_fun(path)) shim <- !is.null(range) limits <- standardise_limits( diff --git a/R/utils.R b/R/utils.R index 9f8ccf26..fa7e1992 100644 --- a/R/utils.R +++ b/R/utils.R @@ -9,10 +9,6 @@ check_file <- function(path) { path } -normalize_path <- function(path) { - enc2native(normalizePath(path)) -} - is_integerish <- function(x) { floor(x) == x } diff --git a/src/XlsWorkBook.h b/src/XlsWorkBook.h index 31f7f8d6..4dfd37fc 100644 --- a/src/XlsWorkBook.h +++ b/src/XlsWorkBook.h @@ -4,6 +4,7 @@ #include #include "ColSpec.h" #include "libxls/xls.h" +#include "cpp11/r_string.hpp" class XlsWorkBook { @@ -19,7 +20,12 @@ class XlsWorkBook { public: XlsWorkBook(const std::string& path) { - path_ = path; + // the user's path has probably been translated to UTF-8 by + // normalizePath() on the R side + // even if that were not true, cpp11 does this automatically when + // constructing a std::string + // but we need to pass the path to libxls in the native encoding + path_ = std::string(Rf_translateChar(cpp11::r_string(path))); xls::xls_error_t error = xls::LIBXLS_OK; xls::xlsWorkBook* pWB_ = xls::xls_open_file(path_.c_str(), "UTF-8", &error); diff --git a/src/XlsWorkSheet.cpp b/src/XlsWorkSheet.cpp index 3296a43b..d837e4da 100644 --- a/src/XlsWorkSheet.cpp +++ b/src/XlsWorkSheet.cpp @@ -5,15 +5,14 @@ #include "libxls/xls.h" [[cpp11::register]] -cpp11::list read_xls_(cpp11::strings path, int sheet_i, +cpp11::list read_xls_(std::string path, int sheet_i, cpp11::integers limits, bool shim, cpp11::sexp col_names, cpp11::strings col_types, std::vector na, bool trim_ws, int guess_max = 1000, bool progress = true) { - std::string filename(Rf_translateChar(STRING_ELT(path, 0))); // Construct worksheet ---------------------------------------------- - XlsWorkSheet ws(filename, sheet_i, limits, shim, progress); + XlsWorkSheet ws(path, sheet_i, limits, shim, progress); // catches empty sheets and sheets where requested rectangle contains no data if (ws.nrow() == 0 && ws.ncol() == 0) { diff --git a/src/cpp11.cpp b/src/cpp11.cpp index f8a630b1..8c8d991a 100644 --- a/src/cpp11.cpp +++ b/src/cpp11.cpp @@ -20,10 +20,10 @@ extern "C" SEXP _readxl_xls_date_formats(SEXP path) { END_CPP11 } // XlsWorkSheet.cpp -cpp11::list read_xls_(cpp11::strings path, int sheet_i, cpp11::integers limits, bool shim, cpp11::sexp col_names, cpp11::strings col_types, std::vector na, bool trim_ws, int guess_max, bool progress); +cpp11::list read_xls_(std::string path, int sheet_i, cpp11::integers limits, bool shim, cpp11::sexp col_names, cpp11::strings col_types, std::vector na, bool trim_ws, int guess_max, bool progress); extern "C" SEXP _readxl_read_xls_(SEXP path, SEXP sheet_i, SEXP limits, SEXP shim, SEXP col_names, SEXP col_types, SEXP na, SEXP trim_ws, SEXP guess_max, SEXP progress) { BEGIN_CPP11 - return cpp11::as_sexp(read_xls_(cpp11::as_cpp>(path), cpp11::as_cpp>(sheet_i), cpp11::as_cpp>(limits), cpp11::as_cpp>(shim), cpp11::as_cpp>(col_names), cpp11::as_cpp>(col_types), cpp11::as_cpp>>(na), cpp11::as_cpp>(trim_ws), cpp11::as_cpp>(guess_max), cpp11::as_cpp>(progress))); + return cpp11::as_sexp(read_xls_(cpp11::as_cpp>(path), cpp11::as_cpp>(sheet_i), cpp11::as_cpp>(limits), cpp11::as_cpp>(shim), cpp11::as_cpp>(col_names), cpp11::as_cpp>(col_types), cpp11::as_cpp>>(na), cpp11::as_cpp>(trim_ws), cpp11::as_cpp>(guess_max), cpp11::as_cpp>(progress))); END_CPP11 } // XlsxWorkBook.cpp