Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions R/zzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,9 @@ default_undesirable_functions <- all_undesirable_functions[names(all_undesirable

rd_auto_link <- function(x) {
x <- unlist(x)
x <- gsub("([a-zA-Z0-9.]+)::([a-zA-Z0-9._]+)\\(\\)", "\\\\code{\\\\link[\\1:\\2]{\\1::\\2()}}", x)
x <- gsub("([^:a-zA-Z0-9._])([a-zA-Z0-9._]+)\\(\\)", "\\1\\\\code{\\\\link[=\\2]{\\2()}}", x)
x <- gsub("`([^`]+)`", "\\\\code{\\1}", x)
x <- gsub(R"{([a-zA-Z0-9.]+)::([a-zA-Z0-9._]+)\(\)}", R"(\\code{\\link[\1:\2]{\1::\2()}})", x)
x <- gsub(R"{([^:a-zA-Z0-9._])([a-zA-Z0-9._]+)\(\)}", R"(\1\\code{\\link[=\2]{\2()}})", x)
x <- gsub("`([^`]+)`", R"(\\code{\1})", x)
x
}

Expand All @@ -181,7 +181,7 @@ rd_undesirable_functions <- function() {
"The following functions are sometimes regarded as undesirable:",
"\\itemize{",
sprintf(
"\\item \\code{\\link[=%1$s]{%1$s()}} As an alternative, %2$s.",
R"(\item \code{\link[=%1$s]{%1$s()}} As an alternative, %2$s.)",
names(default_undesirable_functions), alternatives
),
"}"
Expand Down
168 changes: 81 additions & 87 deletions tests/testthat/test-fixed_regex_linter.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
# NB: escaping is confusing. We have to double-escape everything -- the first
# escape creates a string that will be parse()d, the second escape is normal
# escaping that would be done in R code. E.g. in "\\\\.", the R code would
# read like "\\.", but in order to create those two slashes, we need to write
# "\\\\." in the string here.

test_that("fixed_regex_linter skips allowed usages", {
linter <- fixed_regex_linter()

expect_lint("gsub('^x', '', y)", NULL, linter)
expect_lint("grep('x$', '', y)", NULL, linter)
expect_lint("sub('[a-zA-Z]', '', y)", NULL, linter)
expect_lint("grepl(fmt, y)", NULL, linter)
expect_lint("regexec('\\\\s', '', y)", NULL, linter)
expect_lint(R"{regexec('\\s', '', y)}", NULL, linter)
expect_lint("grep('a(?=b)', x, perl = TRUE)", NULL, linter)
expect_lint("grep('0+1', x, perl = TRUE)", NULL, linter)
expect_lint("grep('1*2', x)", NULL, linter)
expect_lint("grep('a|b', x)", NULL, linter)
expect_lint("grep('\\\\[|\\\\]', x)", NULL, linter)
expect_lint(R"{grep('\\[|\\]', x)}", NULL, linter)

# if fixed=TRUE is already set, regex patterns don't matter
expect_lint("gsub('\\\\.', '', y, fixed = TRUE)", NULL, linter)
expect_lint(R"{gsub('\\.', '', y, fixed = TRUE)}", NULL, linter)

# ignore.case=TRUE implies regex interpretation
expect_lint("gsub('abcdefg', '', y, ignore.case = TRUE)", NULL, linter)
Expand All @@ -37,10 +31,10 @@ test_that("fixed_regex_linter blocks simple disallowed usages", {
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("gsub('\\\\.', '', x)", lint_msg, linter)
expect_lint(R"{gsub('\\.', '', x)}", lint_msg, linter)
expect_lint("grepl('abcdefg', x)", lint_msg, linter)
expect_lint("gregexpr('a-z', y)", lint_msg, linter)
expect_lint("regexec('\\\\$', x)", lint_msg, linter)
expect_lint(R"{regexec('\\$', x)}", lint_msg, linter)
expect_lint("grep('\n', x)", lint_msg, linter)

# naming the argument doesn't matter (if it's still used positionally)
Expand All @@ -51,21 +45,21 @@ patrick::with_parameters_test_that(
"fixed_regex_linter is robust to unrecognized escapes error",
{
expect_lint(
sprintf("grep('\\\\%s', x)", char),
sprintf(R"{grep('\\%s', x)}", char),
rex::rex("This regular expression is static"),
fixed_regex_linter()
)

expect_lint(
sprintf("strsplit('a%sb', '\\\\%s')", char, char),
sprintf(R"{strsplit('a%sb', '\\%s')}", char, char),
rex::rex("This regular expression is static"),
fixed_regex_linter()
)
},
.cases = local({
char <- c(
"^", "$", "{", "}", "(", ")", ".", "*", "+", "?",
"|", "[", "]", "\\\\", "<", ">", "=", ":", ";", "/",
"|", "[", "]", R"(\\)", "<", ">", "=", ":", ";", "/",
"_", "-", "!", "@", "#", "%", "&", "~"
)
data.frame(char = char, .test_name = char)
Expand All @@ -87,7 +81,7 @@ test_that("fixed_regex_linter catches null calls to strsplit as well", {
linter <- fixed_regex_linter()

expect_lint("strsplit(x, '^x')", NULL, linter)
expect_lint("strsplit(x, '\\\\s')", NULL, linter)
expect_lint(R"{strsplit(x, '\\s')}", NULL, linter)
expect_lint("strsplit(x, 'a(?=b)', perl = TRUE)", NULL, linter)
expect_lint("strsplit(x, '0+1', perl = TRUE)", NULL, linter)
expect_lint("strsplit(x, 'a|b')", NULL, linter)
Expand All @@ -97,15 +91,15 @@ test_that("fixed_regex_linter catches null calls to strsplit as well", {
expect_lint("tstrsplit(x, fmt)", NULL, linter)

# if fixed=TRUE is already set, regex patterns don't matter
expect_lint("strsplit(x, '\\\\.', fixed = TRUE)", NULL, linter)
expect_lint("strsplit(x, '\\\\.', fixed = T)", NULL, linter)
expect_lint(R"{strsplit(x, '\\.', fixed = TRUE)}", NULL, linter)
expect_lint(R"{strsplit(x, '\\.', fixed = T)}", NULL, linter)
})

test_that("fixed_regex_linter catches calls to strsplit as well", {
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("strsplit(x, '\\\\.')", lint_msg, linter)
expect_lint(R"{strsplit(x, '\\.')}", lint_msg, linter)
expect_lint("strsplit(x, '[.]')", lint_msg, linter)

expect_lint("tstrsplit(x, 'abcdefg')", lint_msg, linter)
Expand All @@ -115,8 +109,8 @@ test_that("fixed_regex_linter is more exact about distinguishing \\s from \\:",
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("grep('\\\\s', '', x)", NULL, linter)
expect_lint("grep('\\\\:', '', x)", lint_msg, linter)
expect_lint(R"{grep('\\s', '', x)}", NULL, linter)
expect_lint(R"{grep('\\:', '', x)}", lint_msg, linter)
})

## tests for stringr functions
Expand All @@ -126,12 +120,12 @@ test_that("fixed_regex_linter skips allowed stringr usages", {
expect_lint("str_replace(y, '[a-zA-Z]', '')", NULL, linter)
expect_lint("str_replace_all(y, '^x', '')", NULL, linter)
expect_lint("str_detect(y, fmt)", NULL, linter)
expect_lint("str_extract(y, '\\\\s')", NULL, linter)
expect_lint("str_extract_all(y, '\\\\s')", NULL, linter)
expect_lint(R"{str_extract(y, '\\s')}", NULL, linter)
expect_lint(R"{str_extract_all(y, '\\s')}", NULL, linter)
expect_lint("str_which(x, '1*2')", NULL, linter)

# if fixed() is already set, regex patterns don't matter
expect_lint("str_replace(y, fixed('\\\\.'), '')", NULL, linter)
expect_lint(R"{str_replace(y, fixed('\\.'), '')}", NULL, linter)

# namespace qualification doesn't matter
expect_lint("stringr::str_replace(y, stringr::fixed('abcdefg'), '')", NULL, linter)
Expand All @@ -141,16 +135,16 @@ test_that("fixed_regex_linter blocks simple disallowed usages of stringr functio
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("str_replace_all(x, '\\\\.', '')", lint_msg, linter)
expect_lint(R"{str_replace_all(x, '\\.', '')}", lint_msg, linter)
expect_lint("str_detect(x, 'abcdefg')", lint_msg, linter)
expect_lint("str_locate(y, 'a-z')", lint_msg, linter)
expect_lint("str_subset(x, '\\\\$')", lint_msg, linter)
expect_lint(R"{str_subset(x, '\\$')}", lint_msg, linter)
expect_lint("str_which(x, '\n')", lint_msg, linter)

# named, positional arguments are still caught
expect_lint("str_locate(y, pattern = 'a-z')", lint_msg, linter)
# nor do other named arguments throw things off
expect_lint("str_starts(x, '\\\\.', negate = TRUE)", lint_msg, linter)
expect_lint(R"{str_starts(x, '\\.', negate = TRUE)}", lint_msg, linter)
})

test_that("fixed_regex_linter catches calls to str_split as well", {
Expand All @@ -161,8 +155,8 @@ test_that("fixed_regex_linter catches calls to str_split as well", {
expect_lint("str_split(x, fmt)", NULL, linter)

# if fixed() is already set, regex patterns don't matter
expect_lint("str_split(x, fixed('\\\\.'))", NULL, linter)
expect_lint("str_split(x, '\\\\.')", lint_msg, linter)
expect_lint(R"{str_split(x, fixed('\\.'))}", NULL, linter)
expect_lint(R"{str_split(x, '\\.')}", lint_msg, linter)
expect_lint("str_split(x, '[.]')", lint_msg, linter)
})

Expand All @@ -187,24 +181,24 @@ test_that("one-character character classes with escaped characters are caught",
linter <- fixed_regex_linter()
lint_msg <- rex::rex("This regular expression is static")

expect_lint("gsub('[\\n]', '', x)", lint_msg, linter)
expect_lint("gsub('[\\\"]', '', x)", lint_msg, linter)
expect_lint('gsub("\\\\<", "x", x, perl = TRUE)', lint_msg, linter)

expect_lint("str_split(x, '[\\1]')", lint_msg, linter)
expect_lint("str_split(x, '[\\12]')", lint_msg, linter)
expect_lint("str_split(x, '[\\123]')", lint_msg, linter)
expect_lint("str_split(x, '[\\xa]')", lint_msg, linter)
expect_lint("str_split(x, '[\\x32]')", lint_msg, linter)
expect_lint("str_split(x, '[\\uF]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u01]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u012]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u0123]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U8]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U1d4d7]')", lint_msg, linter)
expect_lint("str_split(x, '[\\u{1}]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U{F7D5}]')", lint_msg, linter)
expect_lint("str_split(x, '[\\U{1D4D7}]')", lint_msg, linter)
expect_lint(R"{gsub('[\n]', '', x)}", lint_msg, linter)
expect_lint(R"{gsub('[\"]', '', x)}", lint_msg, linter)
expect_lint(R'{gsub("\\<", "x", x, perl = TRUE)}', lint_msg, linter)

expect_lint(R"{str_split(x, '[\1]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\12]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\123]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\xa]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\x32]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\uF]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u01]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u012]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u0123]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U8]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U1d4d7]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\u{1}]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U{F7D5}]')}", lint_msg, linter)
expect_lint(R"{str_split(x, '[\U{1D4D7}]')}", lint_msg, linter)
})

test_that("bracketed unicode escapes are caught", {
Expand All @@ -218,15 +212,15 @@ test_that("bracketed unicode escapes are caught", {

test_that("escaped characters are handled correctly", {
linter <- fixed_regex_linter()
expect_lint("gsub('\\n+', '', sql)", NULL, linter)
expect_lint(R"{gsub('\n+', '', sql)}", NULL, linter)
expect_lint('gsub("\\n{2,}", "\n", D)', NULL, linter)
expect_lint('gsub("[\\r\\n]", "", x)', NULL, linter)
expect_lint('gsub("\\n $", "", y)', NULL, linter)
expect_lint('gsub("```\\n*```r*\\n*", "", x)', NULL, linter)
expect_lint(R'{gsub("[\r\n]", "", x)}', NULL, linter)
expect_lint(R'{gsub("\n $", "", y)}', NULL, linter)
expect_lint(R'{gsub("```\n*```r*\n*", "", x)}', NULL, linter)
expect_lint('strsplit(x, "(;|\n)")', NULL, linter)
expect_lint('strsplit(x, "(;|\\n)")', NULL, linter)
expect_lint('grepl("[\\\\W]", x, perl = TRUE)', NULL, linter)
expect_lint('grepl("[\\\\W]", x)', NULL, linter)
expect_lint(R'{strsplit(x, "(;|\n)")}', NULL, linter)
expect_lint(R'{grepl("[\\W]", x, perl = TRUE)}', NULL, linter)
expect_lint(R'{grepl("[\\W]", x)}', NULL, linter)
})

# make sure the logic is properly vectorized
Expand All @@ -238,10 +232,10 @@ test_that("fixed replacements vectorize and recognize str_detect", {
linter <- fixed_regex_linter()
# properly vectorized
expect_lint(
trim_some("{
trim_some(R"({
grepl('abcdefg', x)
grepl('a[.]\\\\.b\\n', x)
}"),
grepl('a[.]\\.b\n', x)
})"),
list(
list(rex::rex('Use "abcdefg" with fixed = TRUE'), line_number = 2L),
list(rex::rex('Use "a..b\\n" with fixed = TRUE'), line_number = 3L)
Expand Down Expand Up @@ -289,37 +283,37 @@ robust_non_printable_unicode <- function() {
# styler: off
local({
.cases <- tibble::tribble(
~.test_name, ~regex_expr, ~fixed_expr,
"[.]", "[.]", ".",
'[\\\"]', '[\\\"]', '\\"',
"[]]", "[]]", "]",
"\\\\.", "\\\\.", ".",
"\\\\:", "\\\\:", ":",
"\\\\<", "\\\\<", "<",
"\\\\$", "\\\\$", "$",
"[\\1]", "[\\1]", "\\001",
"\\1", "\\1", "\\001",
"[\\12]", "[\\12]", "\\n",
"[\\123]", "[\\123]", "S",
"a[*]b", "a[*]b", "a*b",
"abcdefg", "abcdefg", "abcdefg",
"abc\\U{A0DEF}ghi", "abc\\U{A0DEF}ghi", robust_non_printable_unicode(),
"a-z", "a-z", "a-z",
"[\\n]", "[\\n]", "\\n",
"\\n", "\n", "\\n",
"[\\u01]", "[\\u01]", "\\001",
"[\\u012]", "[\\u012]", "\\022",
"[\\u0123]", "[\\u0123]", "\u0123",
"[\\u{1}]", "[\\u{1}]", "\\001",
"[\\U1d4d7]", "[\\U1d4d7]", "\U1D4D7",
"[\\U{1D4D7}]", "[\\U{1D4D7}]", "\U1D4D7",
"[\\U8]", "[\\U8]", "\\b",
"\\u{A0}", "\\u{A0}", "\uA0",
"\\u{A0}\\U{0001d4d7}", "\\u{A0}\\U{0001d4d7}", "\uA0\U1D4D7",
"[\\uF]", "[\\uF]", "\\017",
"[\\U{F7D5}]", "[\\U{F7D5}]", "\UF7D5",
"[\\x32]", "[\\x32]", "2",
"[\\xa]", "[\\xa]", "\\n"
~.test_name, ~regex_expr, ~fixed_expr,
"[.]", "[.]", ".",
'[\\\"]', R'([\"])', '\\"',
"[]]", "[]]", "]",
R"(\\.)", R"(\\.)", ".",
R"(\\:)", R"(\\:)", ":",
R"(\\<)", R"(\\<)", "<",
R"(\\$)", R"(\\$)", "$",
R"([\1])", R"([\1])", R"(\001)",
R"(\1)", R"(\1)", R"(\001)",
R"([\12])", R"([\12])", R"(\n)",
R"([\123])", R"([\123])", "S",
"a[*]b", "a[*]b", "a*b",
"abcdefg", "abcdefg", "abcdefg",
"abc\\U{A0DEF}ghi", "abc\\U{A0DEF}ghi", robust_non_printable_unicode(),
"a-z", "a-z", "a-z",
R"([\n])", R"([\n])", R"(\n)",
R"(\n)", "\n", R"(\n)",
R"([\u01])", R"([\u01])", R"(\001)",
R"([\u012])", R"([\u012])", R"(\022)",
R"([\u0123])", R"([\u0123])", "\u0123",
R"([\u{1}])", R"([\u{1}])", R"(\001)",
R"([\U1d4d7])", R"([\U1d4d7])", "\U1D4D7",
R"([\U{1D4D7}])", R"([\U{1D4D7}])", "\U1D4D7",
R"([\U8])", R"([\U8])", R"(\b)",
R"(\u{A0})", R"(\u{A0})", "\uA0",
R"(\u{A0}\U{0001d4d7})", R"(\u{A0}\U{0001d4d7})", "\uA0\U1D4D7",
R"([\uF])", R"([\uF])", R"(\017)",
R"([\U{F7D5}])", R"([\U{F7D5}])", "\UF7D5",
R"([\x32])", R"([\x32])", "2",
R"([\xa])", R"([\xa])", R"(\n)"
)
if (.Platform$OS.type == "windows" && !hasName(R.Version(), "crt")) {
skip_cases <- c(
Expand Down