From 5952bb601086c28b4d51c34ee5defed0e702afb2 Mon Sep 17 00:00:00 2001 From: Christophe Dervieux Date: Wed, 18 Aug 2021 11:19:41 +0200 Subject: [PATCH] Mark result of citeproc conversion as UTF-8 (#2202) Pandoc will output UTF-8 content but on non default UTF-8 (like Windows), system() will return the result string in native encoding. We need to mark it before further processing. fixes #2195 Another option would be to convert to a file and read it back into R. --- DESCRIPTION | 2 +- NEWS.md | 2 ++ R/pandoc.R | 2 ++ tests/testthat/_snaps/pandoc.md | 49 +++++++++++++++++++++++++++++++ tests/testthat/resources/UTF8.bib | 7 +++++ tests/testthat/test-pandoc.R | 11 +++++++ 6 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/_snaps/pandoc.md create mode 100644 tests/testthat/resources/UTF8.bib create mode 100644 tests/testthat/test-pandoc.R diff --git a/DESCRIPTION b/DESCRIPTION index 44f149f237..6914851a8b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: rmarkdown Type: Package Title: Dynamic Documents for R -Version: 2.10.2 +Version: 2.10.3 Authors@R: c( person("JJ", "Allaire", role = "aut", email = "jj@rstudio.com"), person("Yihui", "Xie", role = c("aut", "cre"), email = "xie@yihui.name", comment = c(ORCID = "0000-0003-0645-5666")), diff --git a/NEWS.md b/NEWS.md index 348daf2c0f..cff656c262 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ rmarkdown 2.11 - It is possible to specify the version of jQuery via a global option now, e.g., `options(rmarkdown.jquery.version = 2)` (note that the default major version is `3`). This is mainly for advanced users and developers to test different versions of jQuery. +- `pandoc_citeproc_convert()` now handles correctly bib file containing specific UTF-8 characters on non default UTF-8 systems like Windows (thanks, @mitchelloharawild, #2195). + rmarkdown 2.10 ================================================================================ diff --git a/R/pandoc.R b/R/pandoc.R index bacb4a18a0..bb715b91ff 100644 --- a/R/pandoc.R +++ b/R/pandoc.R @@ -157,6 +157,8 @@ pandoc_citeproc_convert <- function(file, type = c("list", "json", "yaml")) { stop("Error ", status, " occurred building shared library.") } + Encoding(result) <- "UTF-8" + # convert the output if requested if (type == "list") { jsonlite::fromJSON(result, simplifyVector = FALSE) diff --git a/tests/testthat/_snaps/pandoc.md b/tests/testthat/_snaps/pandoc.md new file mode 100644 index 0000000000..ea7127184e --- /dev/null +++ b/tests/testthat/_snaps/pandoc.md @@ -0,0 +1,49 @@ +# Converting bib file is working + + list(list(author = list(list(family = "Conceição", given = "Sérgio")), + "container-title" = "Portuguese History", id = "conc2021", + issue = "1", issued = list("date-parts" = list(list(2021L))), + title = "História da habitação", type = "article-journal")) + +--- + + [1] "[" + [2] " {" + [3] " \"author\": [" + [4] " {" + [5] " \"family\": \"Conceição\"," + [6] " \"given\": \"Sérgio\"" + [7] " }" + [8] " ]," + [9] " \"container-title\": \"Portuguese History\"," + [10] " \"id\": \"conc2021\"," + [11] " \"issue\": \"1\"," + [12] " \"issued\": {" + [13] " \"date-parts\": [" + [14] " [" + [15] " 2021" + [16] " ]" + [17] " ]" + [18] " }," + [19] " \"title\": \"História da habitação\"," + [20] " \"type\": \"article-journal\"" + [21] " }" + [22] "]" + +--- + + [1] "---" + [2] "nocite: \"[@*]\"" + [3] "references:" + [4] "- author:" + [5] " - family: Conceição" + [6] " given: Sérgio" + [7] " container-title: Portuguese History" + [8] " id: conc2021" + [9] " issue: 1" + [10] " issued: 2021" + [11] " title: História da habitação" + [12] " type: article-journal" + [13] "---" + [14] "" + diff --git a/tests/testthat/resources/UTF8.bib b/tests/testthat/resources/UTF8.bib new file mode 100644 index 0000000000..a21773b077 --- /dev/null +++ b/tests/testthat/resources/UTF8.bib @@ -0,0 +1,7 @@ +@article{conc2021, + title={História da Habitação}, + author={Conceição, Sérgio}, + journal={Portuguese History}, + number={1}, + year={2021} +} diff --git a/tests/testthat/test-pandoc.R b/tests/testthat/test-pandoc.R new file mode 100644 index 0000000000..7b60ca80ae --- /dev/null +++ b/tests/testthat/test-pandoc.R @@ -0,0 +1,11 @@ +# TODO: to remove when switching the package to edition 3 +local_edition(3) + +test_that("Converting bib file is working", { + skip_on_cran() + skip_if_not_pandoc("2.11") # only test with newer Pandoc citeproc + bib_file <- test_path("resources/UTF8.bib") + expect_snapshot_value(pandoc_citeproc_convert(bib_file, "list"), style = "deparse") + expect_snapshot_output(pandoc_citeproc_convert(bib_file, "json")) + expect_snapshot_output(pandoc_citeproc_convert(bib_file, "yaml")) +})