diff --git a/NEWS.md b/NEWS.md index 7a2227f5..7fb6926c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,19 @@ # readxl 1.1.0.9000 +## Breaking change + * Missing or duplicated column names are now repaired with `tibble::set_tidy_names()` in `read_excel()` and friends. `set_tidy_names()` is intended to encourage name repair that is more principled and consistent, across multiple tidyverse packages. Its design is discussed in [tidyverse/tibble#217](https://github.com/tidyverse/tibble/issues/217). (#357, #453) - Example of change a user will see: consider a spreadsheet with three columns, one unnamed and two named `x`. - Column names in readxl > 1.1.0: `..1`, `x..2`, `x..3` - Column names in readxl <= 1.1.0: `X__1`, `x`, `x__1` + +## Other changes + +* Path handling (#477): + + - `.xls` paths are no longer normalized. (#476 xls) + - All paths are explicitly converted to the native encoding via `enc2native()` (#370) # readxl 1.1.0 diff --git a/R/read_excel.R b/R/read_excel.R index a6de3db2..29ead9ee 100644 --- a/R/read_excel.R +++ b/R/read_excel.R @@ -152,7 +152,7 @@ read_excel_ <- function(path, sheet = NULL, range = NULL, tibble::set_tidy_names( tibble::as_tibble( read_fun( - path = path, sheet_i = sheet, + path = enc2native(path), sheet_i = sheet, limits = limits, shim = shim, col_names = col_names, col_types = col_types, na = na, trim_ws = trim_ws, guess_max = guess_max diff --git a/src/XlsWorkBook.h b/src/XlsWorkBook.h index a02b4c60..ae756418 100644 --- a/src/XlsWorkBook.h +++ b/src/XlsWorkBook.h @@ -6,12 +6,6 @@ #include "ColSpec.h" #include "utils.h" -inline std::string normalizePath(std::string path) { - Rcpp::Environment baseEnv = Rcpp::Environment::base_env(); - Rcpp::Function normalizePath = baseEnv["normalizePath"]; - return Rcpp::as(normalizePath(path, "/", true)); -} - class XlsWorkBook { // common to Xls[x]WorkBook @@ -26,7 +20,7 @@ class XlsWorkBook { public: XlsWorkBook(const std::string& path) { - path_ = normalizePath(path); + path_ = path; xls::xlsWorkBook* pWB_ = xls::xls_open(path_.c_str(), "UTF-8"); if (pWB_ == NULL) { diff --git a/tests/testthat/test-read-excel.R b/tests/testthat/test-read-excel.R index b7268986..fefbd8be 100644 --- a/tests/testthat/test-read-excel.R +++ b/tests/testthat/test-read-excel.R @@ -93,3 +93,25 @@ test_that("trim_ws must be a logical", { "`trim_ws` must be either TRUE or FALSE" ) }) + +## xlsx: https://github.com/tidyverse/readxl/issues/370 +## xls: https://github.com/tidyverse/readxl/issues/476 +test_that("non-ASCII filenames can be read", { + skip_on_cran() + ## chosen to be non-ASCII but + ## [1] representable in Windows-1252 and + ## [2] not any of the few differences between Windows-1252 and ISO-8859-1 + ## a-grave + e-diaeresis + Eth + '.xls[x]' + xls_filename <- "\u00C0\u00CB\u00D0\u002E\u0078\u006C\u0073" + xlsx_filename <- "\u00C0\u00CB\u00D0\u002E\u0078\u006C\u0073\u0078" + tricky_xls_file <- file.path(tempdir(), xls_filename) + tricky_xlsx_file <- file.path(tempdir(), xlsx_filename) + file.copy(test_sheet("list_type.xls"), tricky_xls_file) + file.copy(test_sheet("list_type.xlsx"), tricky_xlsx_file) + expect_true(file.exists(tricky_xls_file)) + expect_true(file.exists(tricky_xlsx_file)) + on.exit(file.remove(tricky_xls_file)) + on.exit(file.remove(tricky_xlsx_file)) + expect_error_free(read_xls(tricky_xls_file)) + expect_error_free(read_xlsx(tricky_xlsx_file)) +})