From 76b93e673185525a42882be8e5909e94fea9bed2 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Sun, 8 May 2022 15:18:05 -0700 Subject: [PATCH 01/16] Revert filepath reencoding --- R/path.R | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/R/path.R b/R/path.R index 2c3cfb6c..8863e4ed 100644 --- a/R/path.R +++ b/R/path.R @@ -20,14 +20,6 @@ reencode_file <- function(path, encoding) { return(list(out_file)) } -reencode_filepath <- function(path) { - if (is_windows()) { - enc2utf8(path) - } else { - enc2native(path) - } -} - # These functions adapted from https://github.com/tidyverse/readr/blob/192cb1ca5c445e359f153d2259391e6d324fd0a2/R/source.R standardise_path <- function(path) { if (is.raw(path)) { @@ -68,7 +60,7 @@ standardise_path <- function(path) { } } - as.list(reencode_filepath(path)) + as.list(path) } standardise_one_path <- function (path, write = FALSE) { From 8e821c07af2594c83b18dd6f9cd3c2771d44eafe Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Sun, 8 May 2022 15:19:27 -0700 Subject: [PATCH 02/16] Unskip this test --- tests/testthat/test-path.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R index 01b5234c..2c1589ec 100644 --- a/tests/testthat/test-path.R +++ b/tests/testthat/test-path.R @@ -110,11 +110,6 @@ test_that("can read file w/o final newline, w/ multi-byte characters in path", { # for completeness, w.r.t. test above test_that("can read file w/ final newline, w/ multi-byte characters in path", { - # (our usage of) mio seems to fail for a non-ascii path, on linux, in a - # non-UTF-8 local - # I'm not convinced it's worth troubleshooting at this point - skip_if(!is_windows() && isTRUE(l10n_info()$`Latin-1`)) - pattern <- "yes-trailing-n\u00e8wline-m\u00fblti-byt\u00e9-path-" tfile <- withr::local_tempfile(pattern = pattern, fileext = ".csv") writeLines(c("a,b", "A,B"), tfile) From 4317c8a285ecf8d9cd17a6285301b9ea2613c00b Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Sun, 8 May 2022 15:35:48 -0700 Subject: [PATCH 03/16] Re-encode to native just prior to fopen() or mio::make_mmap_source() --- src/unicode_fopen.h | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/unicode_fopen.h b/src/unicode_fopen.h index b3a735d0..049dbd20 100644 --- a/src/unicode_fopen.h +++ b/src/unicode_fopen.h @@ -12,9 +12,12 @@ #endif // clang-format on -#ifdef _WIN32 #include + +#ifdef _WIN32 #include +#else +#include "cpp11/r_string.hpp" #endif // This is needed to support wide character paths on windows @@ -40,7 +43,10 @@ inline FILE* unicode_fopen(const char* path, const char* mode) { MultiByteToWideChar(CP_UTF8, 0, path, -1, buf, len); out = _wfopen(buf, mode_w); #else - out = fopen(path, mode); + // cpp11 will have converted the user's path to UTF-8 by now + // but we need to pass the path to fopen() in the native encoding + const char* native_path = Rf_translateChar(cpp11::r_string(path)); + out = fopen(native_path, mode); #endif return out; @@ -64,6 +70,9 @@ make_mmap_source(const char* file, std::error_code& error) { free(buf); return out; #else - return mio::make_mmap_source(file, error); + // cpp11 will have converted the user's path to UTF-8 by now + // but we need to pass the path to mio in the native encoding + const char* native_path = Rf_translateChar(cpp11::r_string(file)); + return mio::make_mmap_source(native_path, error); #endif } From 000790d75db279adc80c1816d129128cb90db622 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Sun, 8 May 2022 16:11:49 -0700 Subject: [PATCH 04/16] Expose the bytes --- src/unicode_fopen.h | 19 +++++++++++++++++++ src/vroom.cc | 3 +++ 2 files changed, 22 insertions(+) diff --git a/src/unicode_fopen.h b/src/unicode_fopen.h index 049dbd20..292de57b 100644 --- a/src/unicode_fopen.h +++ b/src/unicode_fopen.h @@ -20,6 +20,17 @@ #include "cpp11/r_string.hpp" #endif +inline void print_hex(const char* string) { + unsigned char* p = (unsigned char*) string; + for (int i = 0; i < 300 ; i++) { + if (p[i] == '\0') break; + Rprintf("%c 0x%02x ", p[i], p[i]); + if ((i%16 == 0) && i) + Rprintf("\n"); + } + Rprintf("\n"); +} + // This is needed to support wide character paths on windows inline FILE* unicode_fopen(const char* path, const char* mode) { FILE* out; @@ -45,7 +56,11 @@ inline FILE* unicode_fopen(const char* path, const char* mode) { #else // cpp11 will have converted the user's path to UTF-8 by now // but we need to pass the path to fopen() in the native encoding + Rprintf("unicode_fopen() received path: %s\n", path); + print_hex(path); const char* native_path = Rf_translateChar(cpp11::r_string(path)); + Rprintf("Calling fopen() on native path: %s\n", native_path); + print_hex(native_path); out = fopen(native_path, mode); #endif @@ -72,7 +87,11 @@ make_mmap_source(const char* file, std::error_code& error) { #else // cpp11 will have converted the user's path to UTF-8 by now // but we need to pass the path to mio in the native encoding + Rprintf("make_mmap_source() received path: %s\n", file); + print_hex(file); const char* native_path = Rf_translateChar(cpp11::r_string(file)); + Rprintf("Calling mio::make_mmap_source() on native path: %s\n", native_path); + print_hex(native_path); return mio::make_mmap_source(native_path, error); #endif } diff --git a/src/vroom.cc b/src/vroom.cc index 6fe129c6..08f30780 100644 --- a/src/vroom.cc +++ b/src/vroom.cc @@ -95,9 +95,12 @@ [[cpp11::register]] bool has_trailing_newline(const cpp11::strings& filename) { std::FILE* f = unicode_fopen(CHAR(filename[0]), "rb"); + Rprintf("In has_trailing_newline(): "); if (!f) { + Rprintf("no file\n"); return true; } + Rprintf("yes file\n"); std::setvbuf(f, nullptr, _IONBF, 0); From bdecf47fb36cffea2632a31cd2eeff805d75840d Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Sun, 8 May 2022 16:25:15 -0700 Subject: [PATCH 05/16] Unconditionally encode paths as UTF-8 --- R/path.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/path.R b/R/path.R index 8863e4ed..1cf33d65 100644 --- a/R/path.R +++ b/R/path.R @@ -60,7 +60,7 @@ standardise_path <- function(path) { } } - as.list(path) + as.list(enc2utf8(path)) } standardise_one_path <- function (path, write = FALSE) { @@ -140,6 +140,8 @@ standardise_one_path <- function (path, write = FALSE) { stop("Can only read from, not write to, .zip", call. = FALSE) } + path <- enc2utf8(path) + switch(compression, gz = gzfile(path, ""), bz2 = bzfile(path, ""), From 1023b1c59472e80f56a882d164fbcc88aa64e613 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Sun, 8 May 2022 17:26:41 -0700 Subject: [PATCH 06/16] Clean up comments, remove print debugging --- src/unicode_fopen.h | 41 ++++++++++++++++++----------------------- src/vroom.cc | 3 --- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/src/unicode_fopen.h b/src/unicode_fopen.h index 292de57b..eac96dc7 100644 --- a/src/unicode_fopen.h +++ b/src/unicode_fopen.h @@ -20,22 +20,23 @@ #include "cpp11/r_string.hpp" #endif -inline void print_hex(const char* string) { - unsigned char* p = (unsigned char*) string; - for (int i = 0; i < 300 ; i++) { - if (p[i] == '\0') break; - Rprintf("%c 0x%02x ", p[i], p[i]); - if ((i%16 == 0) && i) - Rprintf("\n"); - } - Rprintf("\n"); -} +// useful for print debugging file path encoding +// inline void print_hex(const char* string) { +// unsigned char* p = (unsigned char*) string; +// for (int i = 0; i < 300 ; i++) { +// if (p[i] == '\0') break; +// Rprintf("%c 0x%02x ", p[i], p[i]); +// if ((i%16 == 0) && i) +// Rprintf("\n"); +// } +// Rprintf("\n"); +// } // This is needed to support wide character paths on windows inline FILE* unicode_fopen(const char* path, const char* mode) { FILE* out; #ifdef _WIN32 - // First conver the mode to the wide equivalent + // First convert the mode to the wide equivalent // Only usage is 2 characters so max 8 bytes + 2 byte null. wchar_t mode_w[10]; MultiByteToWideChar(CP_UTF8, 0, mode, -1, mode_w, 9); @@ -54,13 +55,10 @@ inline FILE* unicode_fopen(const char* path, const char* mode) { MultiByteToWideChar(CP_UTF8, 0, path, -1, buf, len); out = _wfopen(buf, mode_w); #else - // cpp11 will have converted the user's path to UTF-8 by now - // but we need to pass the path to fopen() in the native encoding - Rprintf("unicode_fopen() received path: %s\n", path); - print_hex(path); + // the path has UTF-8 encoding, because we do that unconditionally on the R + // side (but also because cpp11 is eager to use UTF-8) + // however, we need to pass the path to fopen() in the native encoding const char* native_path = Rf_translateChar(cpp11::r_string(path)); - Rprintf("Calling fopen() on native path: %s\n", native_path); - print_hex(native_path); out = fopen(native_path, mode); #endif @@ -85,13 +83,10 @@ make_mmap_source(const char* file, std::error_code& error) { free(buf); return out; #else - // cpp11 will have converted the user's path to UTF-8 by now - // but we need to pass the path to mio in the native encoding - Rprintf("make_mmap_source() received path: %s\n", file); - print_hex(file); + // the path has UTF-8 encoding, because we do that unconditionally on the R + // side (but also because cpp11 is eager to use UTF-8) + // however, we need to pass the path to fopen() in the native encoding const char* native_path = Rf_translateChar(cpp11::r_string(file)); - Rprintf("Calling mio::make_mmap_source() on native path: %s\n", native_path); - print_hex(native_path); return mio::make_mmap_source(native_path, error); #endif } diff --git a/src/vroom.cc b/src/vroom.cc index 08f30780..6fe129c6 100644 --- a/src/vroom.cc +++ b/src/vroom.cc @@ -95,12 +95,9 @@ [[cpp11::register]] bool has_trailing_newline(const cpp11::strings& filename) { std::FILE* f = unicode_fopen(CHAR(filename[0]), "rb"); - Rprintf("In has_trailing_newline(): "); if (!f) { - Rprintf("no file\n"); return true; } - Rprintf("yes file\n"); std::setvbuf(f, nullptr, _IONBF, 0); From 2f314b362632fcbc804c7752e4a8ba661c6278b7 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Wed, 11 May 2022 15:10:32 -0700 Subject: [PATCH 07/16] Ensure path returned by `chr_to_file()` is encoded as UTF-8 This guards against the scenario where the tempdir's path has non-ascii characters in it. Presumably that could arise on, say, Windows if the user name has non-ascii characters: > tempdir() [1] "C:\\Users\\jenny\\AppData\\Local\\Temp\\Rtmpg30qBQ" --- R/path.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/path.R b/R/path.R index 1cf33d65..a8b94089 100644 --- a/R/path.R +++ b/R/path.R @@ -259,7 +259,7 @@ chr_to_file <- function(x, envir = parent.frame()) { withr::defer(unlink(out), envir = envir) - normalizePath(out) + enc2utf8(normalizePath(out)) } detect_compression <- function(path) { From 579265391a211b9a51c9158ebff9d74229306105 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Wed, 11 May 2022 15:25:41 -0700 Subject: [PATCH 08/16] Add more `enc2utf8()` Everytime we use a base R path-handling function, explicitly re-encode the result as UTF-8. --- R/path.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/path.R b/R/path.R index a8b94089..132ea7b9 100644 --- a/R/path.R +++ b/R/path.R @@ -96,10 +96,12 @@ standardise_one_path <- function (path, write = FALSE) { ) } - p <- split_path_ext(basename(path)) + path <- enc2utf8(path) + + p <- split_path_ext(enc2utf8(basename(path))) if (write) { - path <- normalizePath(path, mustWork = FALSE) + path <- enc2utf8(normalizePath(path, mustWork = FALSE)) } else { path <- check_path(path) } @@ -140,8 +142,6 @@ standardise_one_path <- function (path, write = FALSE) { stop("Can only read from, not write to, .zip", call. = FALSE) } - path <- enc2utf8(path) - switch(compression, gz = gzfile(path, ""), bz2 = bzfile(path, ""), @@ -222,8 +222,9 @@ is_url <- function(path) { } check_path <- function(path) { - if (file.exists(path)) - return(normalizePath(path, "/", mustWork = FALSE)) + if (file.exists(path)) { + return(enc2utf8(normalizePath(path, "/", mustWork = FALSE))) + } stop("'", path, "' does not exist", if (!is_absolute_path(path)) { From 114c6717d23a31f956eabe958de49ea8850d6b34 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Wed, 11 May 2022 15:59:21 -0700 Subject: [PATCH 09/16] Inline the reference object --- tests/testthat/test-path.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R index 2c1589ec..511c1868 100644 --- a/tests/testthat/test-path.R +++ b/tests/testthat/test-path.R @@ -1,13 +1,10 @@ -mt <- vroom(vroom_example("mtcars.csv"), col_types = list()) - test_that("vroom errors if the file does not exist", { - tf <- tempfile() - expect_error(vroom(tf, col_types = list()), "does not exist") }) test_that("vroom works with compressed files", { + mt <- vroom(vroom_example("mtcars.csv"), col_types = list()) expect_equal(vroom(vroom_example("mtcars.csv.gz"), col_types = list()), mt) expect_equal(vroom(vroom_example("mtcars.csv.bz2"), col_types = list()), mt) expect_equal(vroom(vroom_example("mtcars.csv.xz"), col_types = list()), mt) @@ -17,6 +14,7 @@ test_that("vroom works with compressed files", { test_that("read_file works via https", { skip_on_cran() + mt <- vroom(vroom_example("mtcars.csv"), col_types = list()) url <- "https://raw.githubusercontent.com/r-lib/vroom/main/inst/extdata/mtcars.csv" expect_equal(vroom(url, col_types = list()), mt) }) @@ -24,6 +22,7 @@ test_that("read_file works via https", { test_that("vroom works via https on gz file", { skip_on_cran() + mt <- vroom(vroom_example("mtcars.csv"), col_types = list()) url <- "https://raw.githubusercontent.com/r-lib/vroom/main/inst/extdata/mtcars.csv.gz" expect_equal(vroom(url, col_types = list()), mt) }) From 8f180e06fa54f48ab5dc6132b28f14d960e47bf1 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Wed, 11 May 2022 15:59:42 -0700 Subject: [PATCH 10/16] Test writing to a non-ascii path --- tests/testthat/test-path.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R index 511c1868..afa89169 100644 --- a/tests/testthat/test-path.R +++ b/tests/testthat/test-path.R @@ -118,3 +118,11 @@ test_that("can read file w/ final newline, w/ multi-byte characters in path", { tibble::tibble(a = "A", b = "B") ) }) + +test_that("can write to path with non-ascii characters", { + pattern <- "cr\u00E8me-br\u00FBl\u00E9e-" + tfile <- withr::local_tempfile(pattern = pattern, fileext = ".csv") + dat <- tibble::tibble(a = "A", b = "B") + vroom_write(dat, tfile, delim = ",") + expect_equal(readLines(tfile), c("a,b", "A,B")) +}) From 004f42c79b1a171bdaaec514e82b97abde7a1ad3 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Wed, 11 May 2022 16:42:23 -0700 Subject: [PATCH 11/16] Test read/write of .zip with non-ascii characters in path --- tests/testthat/test-path.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R index afa89169..59fb4fac 100644 --- a/tests/testthat/test-path.R +++ b/tests/testthat/test-path.R @@ -126,3 +126,21 @@ test_that("can write to path with non-ascii characters", { vroom_write(dat, tfile, delim = ",") expect_equal(readLines(tfile), c("a,b", "A,B")) }) + +test_that("can read/write .zip with non-ascii characters in path", { + skip_on_cran() + skip_if_not(rlang::is_installed("archive")) + + tfile <- file.path(tempdir(), "d\u00E4t.zip") + on.exit(unlink(tfile)) + dat <- tibble::tibble(a = "A", b = "B") + vroom_write(dat, tfile) + + # PK is the zip magic number + expect_equal( + readBin(tfile, raw(), n = 2), + as.raw(c(0x50, 0x4b)) + ) + + expect_equal(vroom(tfile), dat) +}) From 9777c2e7692fba4115fda98cd5c457efd4ed3b8d Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Thu, 12 May 2022 17:09:26 -0700 Subject: [PATCH 12/16] Rewrite this test --- tests/testthat/test-path.R | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R index 59fb4fac..830931d2 100644 --- a/tests/testthat/test-path.R +++ b/tests/testthat/test-path.R @@ -127,20 +127,33 @@ test_that("can write to path with non-ascii characters", { expect_equal(readLines(tfile), c("a,b", "A,B")) }) -test_that("can read/write .zip with non-ascii characters in path", { +test_that("can read/write a compressed file with non-ascii characters in path", { skip_on_cran() skip_if_not(rlang::is_installed("archive")) + # https://github.com/r-lib/archive/issues/75 + skip_if(is_windows() && l10n_info()$`Latin-1`) + + make_temp_path <- function(ext) file.path(tempdir(), paste0("d\u00E4t", ext)) + + gzfile <- withr::local_file(make_temp_path(".tar.gz")) + bz2file <- withr::local_file(make_temp_path(".tar.bz2")) + xzfile <- withr::local_file(make_temp_path(".tar.xz")) + zipfile <- withr::local_file(make_temp_path(".zip")) - tfile <- file.path(tempdir(), "d\u00E4t.zip") - on.exit(unlink(tfile)) dat <- tibble::tibble(a = "A", b = "B") - vroom_write(dat, tfile) - # PK is the zip magic number - expect_equal( - readBin(tfile, raw(), n = 2), - as.raw(c(0x50, 0x4b)) - ) + vroom_write(dat, gzfile) + vroom_write(dat, bz2file) + vroom_write(dat, xzfile) + vroom_write(dat, zipfile) + + expect_equal(detect_compression(gzfile), "gz") + expect_equal(detect_compression(bz2file), "bz2") + expect_equal(detect_compression(xzfile), "xz") + expect_equal(detect_compression(zipfile), "zip") - expect_equal(vroom(tfile), dat) + expect_equal(vroom(gzfile, show_col_types = FALSE), dat) + expect_equal(vroom(bz2file, show_col_types = FALSE), dat) + expect_equal(vroom(xzfile, show_col_types = FALSE), dat) + expect_equal(vroom(zipfile, show_col_types = FALSE), dat) }) From a9436d4b2fe5118b8597d1cf78f506115c23566f Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Sat, 14 May 2022 19:01:12 -0700 Subject: [PATCH 13/16] I now think the problem IS in archive, for Windows and unix Skip this test in non-UTF-8 locales for now --- tests/testthat/test-path.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R index 830931d2..702bfcc4 100644 --- a/tests/testthat/test-path.R +++ b/tests/testthat/test-path.R @@ -131,7 +131,7 @@ test_that("can read/write a compressed file with non-ascii characters in path", skip_on_cran() skip_if_not(rlang::is_installed("archive")) # https://github.com/r-lib/archive/issues/75 - skip_if(is_windows() && l10n_info()$`Latin-1`) + skip_if(l10n_info()$`Latin-1`) make_temp_path <- function(ext) file.path(tempdir(), paste0("d\u00E4t", ext)) From 4be132cd17010116a1ff737e2dd20cfa5819dcdc Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Mon, 16 May 2022 19:53:53 -0700 Subject: [PATCH 14/16] Add a test re: reading fwf from non-ascii filepath --- tests/testthat/test-path.R | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R index 702bfcc4..e1e61659 100644 --- a/tests/testthat/test-path.R +++ b/tests/testthat/test-path.R @@ -157,3 +157,18 @@ test_that("can read/write a compressed file with non-ascii characters in path", expect_equal(vroom(xzfile, show_col_types = FALSE), dat) expect_equal(vroom(zipfile, show_col_types = FALSE), dat) }) + +test_that("can read fwf file w/ non-ascii characters in path", { + tfile <- withr::local_tempfile(pattern = "fwf-y\u00F6-", fileext = ".txt") + writeLines(c("A B", "C D"), tfile) + + expect_equal( + spec <- fwf_empty(tfile, col_names = c("a", "b")), + list(begin = c(0L, 2L), end = c(1L, NA), col_names = c("a", "b")) + ) + + expect_equal( + vroom_fwf(tfile, spec, show_col_types = FALSE), + tibble::tibble(a = c("A", "C"), b = c("B", "D")) + ) +}) From e4a6321675fdfbbade442c6891026a22dbc9933d Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Mon, 16 May 2022 21:35:13 -0700 Subject: [PATCH 15/16] Tweaks NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index c6f3cd42..461d32d9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # vroom (development version) -* `vroom()` reads more reliably from filepaths containing non-ascii characters (#394). +* `vroom()` reads more reliably from filepaths containing non-ascii characters, in a non-UTF-8 locale (#394, #438). * Fixed segfault when reading in multiple files and the first file is header-only but subsequent files have at least one row (#430). From 432b918116329239224921b4cb881911d8ad3f33 Mon Sep 17 00:00:00 2001 From: Jenny Bryan Date: Mon, 16 May 2022 21:37:11 -0700 Subject: [PATCH 16/16] Make versions of basename() and normalizePath() that uphold UTF-8 everywhere --- R/path.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/R/path.R b/R/path.R index 132ea7b9..baa2d778 100644 --- a/R/path.R +++ b/R/path.R @@ -98,10 +98,10 @@ standardise_one_path <- function (path, write = FALSE) { path <- enc2utf8(path) - p <- split_path_ext(enc2utf8(basename(path))) + p <- split_path_ext(basename_utf8(path)) if (write) { - path <- enc2utf8(normalizePath(path, mustWork = FALSE)) + path <- normalizePath_utf8(path, mustWork = FALSE) } else { path <- check_path(path) } @@ -223,7 +223,7 @@ is_url <- function(path) { check_path <- function(path) { if (file.exists(path)) { - return(enc2utf8(normalizePath(path, "/", mustWork = FALSE))) + return(normalizePath_utf8(path, mustWork = FALSE)) } stop("'", path, "' does not exist", @@ -260,7 +260,7 @@ chr_to_file <- function(x, envir = parent.frame()) { withr::defer(unlink(out), envir = envir) - enc2utf8(normalizePath(out)) + normalizePath_utf8(out) } detect_compression <- function(path) { @@ -314,3 +314,11 @@ detect_compression <- function(path) { NA_character_ } + +basename_utf8 <- function(path) { + enc2utf8(basename(path)) +} + +normalizePath_utf8 <- function(path, winslash = "/", mustWork = NA) { + enc2utf8(normalizePath(path, winslash = winslash, mustWork = mustWork)) +}