From 76b93e673185525a42882be8e5909e94fea9bed2 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Sun, 8 May 2022 15:18:05 -0700
Subject: [PATCH 01/16] Revert filepath reencoding

---
 R/path.R | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/R/path.R b/R/path.R
index 2c3cfb6c..8863e4ed 100644
--- a/R/path.R
+++ b/R/path.R
@@ -20,14 +20,6 @@ reencode_file <- function(path, encoding) {
   return(list(out_file))
 }
 
-reencode_filepath <- function(path) {
-  if (is_windows()) {
-    enc2utf8(path)
-  } else {
-    enc2native(path)
-  }
-}
-
 # These functions adapted from https://github.com/tidyverse/readr/blob/192cb1ca5c445e359f153d2259391e6d324fd0a2/R/source.R
 standardise_path <- function(path) {
   if (is.raw(path)) {
@@ -68,7 +60,7 @@ standardise_path <- function(path) {
     }
   }
 
-  as.list(reencode_filepath(path))
+  as.list(path)
 }
 
 standardise_one_path <- function (path, write = FALSE) {

From 8e821c07af2594c83b18dd6f9cd3c2771d44eafe Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Sun, 8 May 2022 15:19:27 -0700
Subject: [PATCH 02/16] Unskip this test

---
 tests/testthat/test-path.R | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R
index 01b5234c..2c1589ec 100644
--- a/tests/testthat/test-path.R
+++ b/tests/testthat/test-path.R
@@ -110,11 +110,6 @@ test_that("can read file w/o final newline, w/ multi-byte characters in path", {
 
 # for completeness, w.r.t. test above
 test_that("can read file w/ final newline, w/ multi-byte characters in path", {
-  # (our usage of) mio seems to fail for a non-ascii path, on linux, in a
-  # non-UTF-8 local
-  # I'm not convinced it's worth troubleshooting at this point
-  skip_if(!is_windows() && isTRUE(l10n_info()$`Latin-1`))
-
   pattern <- "yes-trailing-n\u00e8wline-m\u00fblti-byt\u00e9-path-"
   tfile <- withr::local_tempfile(pattern = pattern, fileext = ".csv")
   writeLines(c("a,b", "A,B"), tfile)

From 4317c8a285ecf8d9cd17a6285301b9ea2613c00b Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Sun, 8 May 2022 15:35:48 -0700
Subject: [PATCH 03/16] Re-encode to native just prior to fopen() or
 mio::make_mmap_source()

---
 src/unicode_fopen.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/unicode_fopen.h b/src/unicode_fopen.h
index b3a735d0..049dbd20 100644
--- a/src/unicode_fopen.h
+++ b/src/unicode_fopen.h
@@ -12,9 +12,12 @@
 #endif
 // clang-format on
 
-#ifdef _WIN32
 #include <Rinternals.h>
+
+#ifdef _WIN32
 #include <windows.h>
+#else
+#include "cpp11/r_string.hpp"
 #endif
 
 // This is needed to support wide character paths on windows
@@ -40,7 +43,10 @@ inline FILE* unicode_fopen(const char* path, const char* mode) {
   MultiByteToWideChar(CP_UTF8, 0, path, -1, buf, len);
   out = _wfopen(buf, mode_w);
 #else
-  out = fopen(path, mode);
+  // cpp11 will have converted the user's path to UTF-8 by now
+  // but we need to pass the path to fopen() in the native encoding
+  const char* native_path = Rf_translateChar(cpp11::r_string(path));
+  out = fopen(native_path, mode);
 #endif
 
   return out;
@@ -64,6 +70,9 @@ make_mmap_source(const char* file, std::error_code& error) {
   free(buf);
   return out;
 #else
-  return mio::make_mmap_source(file, error);
+  // cpp11 will have converted the user's path to UTF-8 by now
+  // but we need to pass the path to mio in the native encoding
+  const char* native_path = Rf_translateChar(cpp11::r_string(file));
+  return mio::make_mmap_source(native_path, error);
 #endif
 }

From 000790d75db279adc80c1816d129128cb90db622 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Sun, 8 May 2022 16:11:49 -0700
Subject: [PATCH 04/16] Expose the bytes

---
 src/unicode_fopen.h | 19 +++++++++++++++++++
 src/vroom.cc        |  3 +++
 2 files changed, 22 insertions(+)

diff --git a/src/unicode_fopen.h b/src/unicode_fopen.h
index 049dbd20..292de57b 100644
--- a/src/unicode_fopen.h
+++ b/src/unicode_fopen.h
@@ -20,6 +20,17 @@
 #include "cpp11/r_string.hpp"
 #endif
 
+inline void print_hex(const char* string) {
+  unsigned char* p = (unsigned char*) string;
+  for (int i = 0; i < 300 ; i++) {
+    if (p[i] == '\0') break;
+    Rprintf("%c 0x%02x ", p[i], p[i]);
+    if ((i%16 == 0) && i)
+      Rprintf("\n");
+  }
+  Rprintf("\n");
+}
+
 // This is needed to support wide character paths on windows
 inline FILE* unicode_fopen(const char* path, const char* mode) {
   FILE* out;
@@ -45,7 +56,11 @@ inline FILE* unicode_fopen(const char* path, const char* mode) {
 #else
   // cpp11 will have converted the user's path to UTF-8 by now
   // but we need to pass the path to fopen() in the native encoding
+  Rprintf("unicode_fopen() received path: %s\n", path);
+  print_hex(path);
   const char* native_path = Rf_translateChar(cpp11::r_string(path));
+  Rprintf("Calling fopen() on native path: %s\n", native_path);
+  print_hex(native_path);
   out = fopen(native_path, mode);
 #endif
 
@@ -72,7 +87,11 @@ make_mmap_source(const char* file, std::error_code& error) {
 #else
   // cpp11 will have converted the user's path to UTF-8 by now
   // but we need to pass the path to mio in the native encoding
+  Rprintf("make_mmap_source() received path: %s\n", file);
+  print_hex(file);
   const char* native_path = Rf_translateChar(cpp11::r_string(file));
+  Rprintf("Calling mio::make_mmap_source() on native path: %s\n", native_path);
+  print_hex(native_path);
   return mio::make_mmap_source(native_path, error);
 #endif
 }
diff --git a/src/vroom.cc b/src/vroom.cc
index 6fe129c6..08f30780 100644
--- a/src/vroom.cc
+++ b/src/vroom.cc
@@ -95,9 +95,12 @@
 [[cpp11::register]] bool has_trailing_newline(const cpp11::strings& filename) {
   std::FILE* f = unicode_fopen(CHAR(filename[0]), "rb");
 
+  Rprintf("In has_trailing_newline(): ");
   if (!f) {
+    Rprintf("no file\n");
     return true;
   }
+  Rprintf("yes file\n");
 
   std::setvbuf(f, nullptr, _IONBF, 0);
 

From bdecf47fb36cffea2632a31cd2eeff805d75840d Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Sun, 8 May 2022 16:25:15 -0700
Subject: [PATCH 05/16] Unconditionally encode paths as UTF-8

---
 R/path.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/path.R b/R/path.R
index 8863e4ed..1cf33d65 100644
--- a/R/path.R
+++ b/R/path.R
@@ -60,7 +60,7 @@ standardise_path <- function(path) {
     }
   }
 
-  as.list(path)
+  as.list(enc2utf8(path))
 }
 
 standardise_one_path <- function (path, write = FALSE) {
@@ -140,6 +140,8 @@ standardise_one_path <- function (path, write = FALSE) {
     stop("Can only read from, not write to, .zip", call. = FALSE)
   }
 
+  path <- enc2utf8(path)
+
   switch(compression,
     gz = gzfile(path, ""),
     bz2 = bzfile(path, ""),

From 1023b1c59472e80f56a882d164fbcc88aa64e613 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Sun, 8 May 2022 17:26:41 -0700
Subject: [PATCH 06/16] Clean up comments, remove print debugging

---
 src/unicode_fopen.h | 41 ++++++++++++++++++-----------------------
 src/vroom.cc        |  3 ---
 2 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/src/unicode_fopen.h b/src/unicode_fopen.h
index 292de57b..eac96dc7 100644
--- a/src/unicode_fopen.h
+++ b/src/unicode_fopen.h
@@ -20,22 +20,23 @@
 #include "cpp11/r_string.hpp"
 #endif
 
-inline void print_hex(const char* string) {
-  unsigned char* p = (unsigned char*) string;
-  for (int i = 0; i < 300 ; i++) {
-    if (p[i] == '\0') break;
-    Rprintf("%c 0x%02x ", p[i], p[i]);
-    if ((i%16 == 0) && i)
-      Rprintf("\n");
-  }
-  Rprintf("\n");
-}
+// useful for print debugging file path encoding
+// inline void print_hex(const char* string) {
+//   unsigned char* p = (unsigned char*) string;
+//   for (int i = 0; i < 300 ; i++) {
+//     if (p[i] == '\0') break;
+//     Rprintf("%c 0x%02x ", p[i], p[i]);
+//     if ((i%16 == 0) && i)
+//       Rprintf("\n");
+//   }
+//   Rprintf("\n");
+// }
 
 // This is needed to support wide character paths on windows
 inline FILE* unicode_fopen(const char* path, const char* mode) {
   FILE* out;
 #ifdef _WIN32
-  // First conver the mode to the wide equivalent
+  // First convert the mode to the wide equivalent
   // Only usage is 2 characters so max 8 bytes + 2 byte null.
   wchar_t mode_w[10];
   MultiByteToWideChar(CP_UTF8, 0, mode, -1, mode_w, 9);
@@ -54,13 +55,10 @@ inline FILE* unicode_fopen(const char* path, const char* mode) {
   MultiByteToWideChar(CP_UTF8, 0, path, -1, buf, len);
   out = _wfopen(buf, mode_w);
 #else
-  // cpp11 will have converted the user's path to UTF-8 by now
-  // but we need to pass the path to fopen() in the native encoding
-  Rprintf("unicode_fopen() received path: %s\n", path);
-  print_hex(path);
+  // the path has UTF-8 encoding, because we do that unconditionally on the R
+  // side (but also because cpp11 is eager to use UTF-8)
+  // however, we need to pass the path to fopen() in the native encoding
   const char* native_path = Rf_translateChar(cpp11::r_string(path));
-  Rprintf("Calling fopen() on native path: %s\n", native_path);
-  print_hex(native_path);
   out = fopen(native_path, mode);
 #endif
 
@@ -85,13 +83,10 @@ make_mmap_source(const char* file, std::error_code& error) {
   free(buf);
   return out;
 #else
-  // cpp11 will have converted the user's path to UTF-8 by now
-  // but we need to pass the path to mio in the native encoding
-  Rprintf("make_mmap_source() received path: %s\n", file);
-  print_hex(file);
+  // the path has UTF-8 encoding, because we do that unconditionally on the R
+  // side (but also because cpp11 is eager to use UTF-8)
+  // however, we need to pass the path to fopen() in the native encoding
   const char* native_path = Rf_translateChar(cpp11::r_string(file));
-  Rprintf("Calling mio::make_mmap_source() on native path: %s\n", native_path);
-  print_hex(native_path);
   return mio::make_mmap_source(native_path, error);
 #endif
 }
diff --git a/src/vroom.cc b/src/vroom.cc
index 08f30780..6fe129c6 100644
--- a/src/vroom.cc
+++ b/src/vroom.cc
@@ -95,12 +95,9 @@
 [[cpp11::register]] bool has_trailing_newline(const cpp11::strings& filename) {
   std::FILE* f = unicode_fopen(CHAR(filename[0]), "rb");
 
-  Rprintf("In has_trailing_newline(): ");
   if (!f) {
-    Rprintf("no file\n");
     return true;
   }
-  Rprintf("yes file\n");
 
   std::setvbuf(f, nullptr, _IONBF, 0);
 

From 2f314b362632fcbc804c7752e4a8ba661c6278b7 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Wed, 11 May 2022 15:10:32 -0700
Subject: [PATCH 07/16] Ensure path returned by `chr_to_file()` is encoded as
 UTF-8

This guards against the scenario where the tempdir's path has non-ascii characters in it.

Presumably that could arise on, say, Windows if the user name has non-ascii characters:

> tempdir()
[1] "C:\\Users\\jenny\\AppData\\Local\\Temp\\Rtmpg30qBQ"
---
 R/path.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/path.R b/R/path.R
index 1cf33d65..a8b94089 100644
--- a/R/path.R
+++ b/R/path.R
@@ -259,7 +259,7 @@ chr_to_file <- function(x, envir = parent.frame()) {
 
   withr::defer(unlink(out), envir = envir)
 
-  normalizePath(out)
+  enc2utf8(normalizePath(out))
 }
 
 detect_compression <- function(path) {

From 579265391a211b9a51c9158ebff9d74229306105 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Wed, 11 May 2022 15:25:41 -0700
Subject: [PATCH 08/16] Add more `enc2utf8()`

Everytime we use a base R path-handling function, explicitly re-encode the result as UTF-8.
---
 R/path.R | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/R/path.R b/R/path.R
index a8b94089..132ea7b9 100644
--- a/R/path.R
+++ b/R/path.R
@@ -96,10 +96,12 @@ standardise_one_path <- function (path, write = FALSE) {
     )
   }
 
-  p <- split_path_ext(basename(path))
+  path <- enc2utf8(path)
+
+  p <- split_path_ext(enc2utf8(basename(path)))
 
   if (write) {
-    path <- normalizePath(path, mustWork = FALSE)
+    path <- enc2utf8(normalizePath(path, mustWork = FALSE))
   } else {
     path <- check_path(path)
   }
@@ -140,8 +142,6 @@ standardise_one_path <- function (path, write = FALSE) {
     stop("Can only read from, not write to, .zip", call. = FALSE)
   }
 
-  path <- enc2utf8(path)
-
   switch(compression,
     gz = gzfile(path, ""),
     bz2 = bzfile(path, ""),
@@ -222,8 +222,9 @@ is_url <- function(path) {
 }
 
 check_path <- function(path) {
-  if (file.exists(path))
-    return(normalizePath(path, "/", mustWork = FALSE))
+  if (file.exists(path)) {
+    return(enc2utf8(normalizePath(path, "/", mustWork = FALSE)))
+  }
 
   stop("'", path, "' does not exist",
     if (!is_absolute_path(path)) {

From 114c6717d23a31f956eabe958de49ea8850d6b34 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Wed, 11 May 2022 15:59:21 -0700
Subject: [PATCH 09/16] Inline the reference object

---
 tests/testthat/test-path.R | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R
index 2c1589ec..511c1868 100644
--- a/tests/testthat/test-path.R
+++ b/tests/testthat/test-path.R
@@ -1,13 +1,10 @@
-mt <- vroom(vroom_example("mtcars.csv"), col_types = list())
-
 test_that("vroom errors if the file does not exist", {
-
   tf <- tempfile()
-
   expect_error(vroom(tf, col_types = list()), "does not exist")
 })
 
 test_that("vroom works with compressed files", {
+  mt <- vroom(vroom_example("mtcars.csv"), col_types = list())
   expect_equal(vroom(vroom_example("mtcars.csv.gz"), col_types = list()), mt)
   expect_equal(vroom(vroom_example("mtcars.csv.bz2"), col_types = list()), mt)
   expect_equal(vroom(vroom_example("mtcars.csv.xz"), col_types = list()), mt)
@@ -17,6 +14,7 @@ test_that("vroom works with compressed files", {
 test_that("read_file works via https", {
   skip_on_cran()
 
+  mt <- vroom(vroom_example("mtcars.csv"), col_types = list())
   url <- "https://raw.githubusercontent.com/r-lib/vroom/main/inst/extdata/mtcars.csv"
   expect_equal(vroom(url, col_types = list()), mt)
 })
@@ -24,6 +22,7 @@ test_that("read_file works via https", {
 test_that("vroom works via https on gz file", {
   skip_on_cran()
 
+  mt <- vroom(vroom_example("mtcars.csv"), col_types = list())
   url <- "https://raw.githubusercontent.com/r-lib/vroom/main/inst/extdata/mtcars.csv.gz"
   expect_equal(vroom(url, col_types = list()), mt)
 })

From 8f180e06fa54f48ab5dc6132b28f14d960e47bf1 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Wed, 11 May 2022 15:59:42 -0700
Subject: [PATCH 10/16] Test writing to a non-ascii path

---
 tests/testthat/test-path.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R
index 511c1868..afa89169 100644
--- a/tests/testthat/test-path.R
+++ b/tests/testthat/test-path.R
@@ -118,3 +118,11 @@ test_that("can read file w/ final newline, w/ multi-byte characters in path", {
     tibble::tibble(a = "A", b = "B")
   )
 })
+
+test_that("can write to path with non-ascii characters", {
+  pattern <- "cr\u00E8me-br\u00FBl\u00E9e-"
+  tfile <- withr::local_tempfile(pattern = pattern, fileext = ".csv")
+  dat <- tibble::tibble(a = "A", b = "B")
+  vroom_write(dat, tfile, delim = ",")
+  expect_equal(readLines(tfile), c("a,b", "A,B"))
+})

From 004f42c79b1a171bdaaec514e82b97abde7a1ad3 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Wed, 11 May 2022 16:42:23 -0700
Subject: [PATCH 11/16] Test read/write of .zip with non-ascii characters in
 path

---
 tests/testthat/test-path.R | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R
index afa89169..59fb4fac 100644
--- a/tests/testthat/test-path.R
+++ b/tests/testthat/test-path.R
@@ -126,3 +126,21 @@ test_that("can write to path with non-ascii characters", {
   vroom_write(dat, tfile, delim = ",")
   expect_equal(readLines(tfile), c("a,b", "A,B"))
 })
+
+test_that("can read/write .zip with non-ascii characters in path", {
+  skip_on_cran()
+  skip_if_not(rlang::is_installed("archive"))
+
+  tfile <- file.path(tempdir(), "d\u00E4t.zip")
+  on.exit(unlink(tfile))
+  dat <- tibble::tibble(a = "A", b = "B")
+  vroom_write(dat, tfile)
+
+  # PK is the zip magic number
+  expect_equal(
+    readBin(tfile, raw(), n = 2),
+    as.raw(c(0x50, 0x4b))
+  )
+
+  expect_equal(vroom(tfile), dat)
+})

From 9777c2e7692fba4115fda98cd5c457efd4ed3b8d Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Thu, 12 May 2022 17:09:26 -0700
Subject: [PATCH 12/16] Rewrite this test

---
 tests/testthat/test-path.R | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R
index 59fb4fac..830931d2 100644
--- a/tests/testthat/test-path.R
+++ b/tests/testthat/test-path.R
@@ -127,20 +127,33 @@ test_that("can write to path with non-ascii characters", {
   expect_equal(readLines(tfile), c("a,b", "A,B"))
 })
 
-test_that("can read/write .zip with non-ascii characters in path", {
+test_that("can read/write a compressed file with non-ascii characters in path", {
   skip_on_cran()
   skip_if_not(rlang::is_installed("archive"))
+  # https://github.com/r-lib/archive/issues/75
+  skip_if(is_windows() && l10n_info()$`Latin-1`)
+
+  make_temp_path <- function(ext) file.path(tempdir(), paste0("d\u00E4t", ext))
+
+  gzfile   <- withr::local_file(make_temp_path(".tar.gz"))
+  bz2file  <- withr::local_file(make_temp_path(".tar.bz2"))
+  xzfile   <- withr::local_file(make_temp_path(".tar.xz"))
+  zipfile  <- withr::local_file(make_temp_path(".zip"))
 
-  tfile <- file.path(tempdir(), "d\u00E4t.zip")
-  on.exit(unlink(tfile))
   dat <- tibble::tibble(a = "A", b = "B")
-  vroom_write(dat, tfile)
 
-  # PK is the zip magic number
-  expect_equal(
-    readBin(tfile, raw(), n = 2),
-    as.raw(c(0x50, 0x4b))
-  )
+  vroom_write(dat, gzfile)
+  vroom_write(dat, bz2file)
+  vroom_write(dat, xzfile)
+  vroom_write(dat, zipfile)
+
+  expect_equal(detect_compression(gzfile), "gz")
+  expect_equal(detect_compression(bz2file), "bz2")
+  expect_equal(detect_compression(xzfile), "xz")
+  expect_equal(detect_compression(zipfile), "zip")
 
-  expect_equal(vroom(tfile), dat)
+  expect_equal(vroom(gzfile,  show_col_types = FALSE), dat)
+  expect_equal(vroom(bz2file, show_col_types = FALSE), dat)
+  expect_equal(vroom(xzfile,  show_col_types = FALSE), dat)
+  expect_equal(vroom(zipfile, show_col_types = FALSE), dat)
 })

From a9436d4b2fe5118b8597d1cf78f506115c23566f Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Sat, 14 May 2022 19:01:12 -0700
Subject: [PATCH 13/16] I now think the problem IS in archive, for Windows and
 unix

Skip this test in non-UTF-8 locales for now
---
 tests/testthat/test-path.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R
index 830931d2..702bfcc4 100644
--- a/tests/testthat/test-path.R
+++ b/tests/testthat/test-path.R
@@ -131,7 +131,7 @@ test_that("can read/write a compressed file with non-ascii characters in path",
   skip_on_cran()
   skip_if_not(rlang::is_installed("archive"))
   # https://github.com/r-lib/archive/issues/75
-  skip_if(is_windows() && l10n_info()$`Latin-1`)
+  skip_if(l10n_info()$`Latin-1`)
 
   make_temp_path <- function(ext) file.path(tempdir(), paste0("d\u00E4t", ext))
 

From 4be132cd17010116a1ff737e2dd20cfa5819dcdc Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Mon, 16 May 2022 19:53:53 -0700
Subject: [PATCH 14/16] Add a test re: reading fwf from non-ascii filepath

---
 tests/testthat/test-path.R | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/tests/testthat/test-path.R b/tests/testthat/test-path.R
index 702bfcc4..e1e61659 100644
--- a/tests/testthat/test-path.R
+++ b/tests/testthat/test-path.R
@@ -157,3 +157,18 @@ test_that("can read/write a compressed file with non-ascii characters in path",
   expect_equal(vroom(xzfile,  show_col_types = FALSE), dat)
   expect_equal(vroom(zipfile, show_col_types = FALSE), dat)
 })
+
+test_that("can read fwf file w/ non-ascii characters in path", {
+  tfile <- withr::local_tempfile(pattern = "fwf-y\u00F6-", fileext = ".txt")
+  writeLines(c("A B", "C D"), tfile)
+
+  expect_equal(
+    spec <- fwf_empty(tfile, col_names = c("a", "b")),
+    list(begin = c(0L, 2L), end = c(1L, NA), col_names = c("a", "b"))
+  )
+
+  expect_equal(
+    vroom_fwf(tfile, spec, show_col_types = FALSE),
+    tibble::tibble(a = c("A", "C"), b = c("B", "D"))
+  )
+})

From e4a6321675fdfbbade442c6891026a22dbc9933d Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Mon, 16 May 2022 21:35:13 -0700
Subject: [PATCH 15/16] Tweaks NEWS

---
 NEWS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index c6f3cd42..461d32d9 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,6 @@
 # vroom (development version)
 
-* `vroom()` reads more reliably from filepaths containing non-ascii characters (#394).
+* `vroom()` reads more reliably from filepaths containing non-ascii characters, in a non-UTF-8 locale (#394, #438).
 
 * Fixed segfault when reading in multiple files and the first file is header-only but subsequent files have at least one row (#430).
 

From 432b918116329239224921b4cb881911d8ad3f33 Mon Sep 17 00:00:00 2001
From: Jenny Bryan <jenny.f.bryan@gmail.com>
Date: Mon, 16 May 2022 21:37:11 -0700
Subject: [PATCH 16/16] Make versions of basename() and normalizePath() that
 uphold UTF-8 everywhere

---
 R/path.R | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/R/path.R b/R/path.R
index 132ea7b9..baa2d778 100644
--- a/R/path.R
+++ b/R/path.R
@@ -98,10 +98,10 @@ standardise_one_path <- function (path, write = FALSE) {
 
   path <- enc2utf8(path)
 
-  p <- split_path_ext(enc2utf8(basename(path)))
+  p <- split_path_ext(basename_utf8(path))
 
   if (write) {
-    path <- enc2utf8(normalizePath(path, mustWork = FALSE))
+    path <- normalizePath_utf8(path, mustWork = FALSE)
   } else {
     path <- check_path(path)
   }
@@ -223,7 +223,7 @@ is_url <- function(path) {
 
 check_path <- function(path) {
   if (file.exists(path)) {
-    return(enc2utf8(normalizePath(path, "/", mustWork = FALSE)))
+    return(normalizePath_utf8(path, mustWork = FALSE))
   }
 
   stop("'", path, "' does not exist",
@@ -260,7 +260,7 @@ chr_to_file <- function(x, envir = parent.frame()) {
 
   withr::defer(unlink(out), envir = envir)
 
-  enc2utf8(normalizePath(out))
+  normalizePath_utf8(out)
 }
 
 detect_compression <- function(path) {
@@ -314,3 +314,11 @@ detect_compression <- function(path) {
 
   NA_character_
 }
+
+basename_utf8 <- function(path) {
+  enc2utf8(basename(path))
+}
+
+normalizePath_utf8 <- function(path, winslash = "/", mustWork = NA) {
+  enc2utf8(normalizePath(path, winslash = winslash, mustWork = mustWork))
+}