diff --git a/NEWS.md b/NEWS.md index cd5aa95049..f5ea7d1807 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ## Bug fixes -* `fixed_regex_linter()` no longer fails with regular expression pattern `"\\;"` (#1545, @IndrajeetPatil). +* `fixed_regex_linter()` is more robust to errors stemming from unrecognized escapes (#1545, #1845, @IndrajeetPatil). * `get_source_expressions()` can handle Sweave/Rmarkdown documents with reference chunks like `<>` (#779, @MichaelChirico). Note that these are simply skipped, rather than attempting to retrieve the reference and also lint it. @@ -11,7 +11,7 @@ * `object_usage_linter()` no longer silently ignores usage warnings that don't contain a quoted name (#1714, @AshesITR) -* `namespace_linter()` correctly recognizes backticked operators to be exported from respectives namespaces (like `` rlang::`%||%` ``) (#1752, @IndrajeetPatil) +* `namespace_linter()` correctly recognizes backticked operators to be exported from respective namespaces (like `` rlang::`%||%` ``) (#1752, @IndrajeetPatil) * `lint_package()` correctly finds a package from within a subdir if the `path` points to anywhere within the package (#1759, @AshesITR) diff --git a/R/fixed_regex_linter.R b/R/fixed_regex_linter.R index 5f066372c7..31edc57ea6 100644 --- a/R/fixed_regex_linter.R +++ b/R/fixed_regex_linter.R @@ -70,8 +70,12 @@ fixed_regex_linter <- function() { # regular expression pattern is the second argument pos_2_regex_funs <- xp_text_in_table(c( - "strsplit", "tstrsplit", - # stringr functions. even though the user action is different + # base functions. + "strsplit", + # data.table functions. + "tstrsplit", + # stringr functions. + # even though the user action is different # (setting fixed=TRUE vs. wrapping stringr::fixed()), # detection of the lint is the same "str_count", "str_detect", "str_ends", "str_extract", "str_extract_all", @@ -231,7 +235,7 @@ get_token_replacement <- function(token_content, token_type) { token_content } } else { # char_escape token - if (rex::re_matches(token_content, rex::rex("\\", one_of("^${}().*+?|[]\\<>:;")))) { + if (rex::re_matches(token_content, rex::rex("\\", one_of("^${}().*+?|[]\\<>=:;/_-!@#%&,~")))) { substr(token_content, start = 2L, stop = nchar(token_content)) } else { eval(parse(text = paste0('"', token_content, '"'))) diff --git a/tests/testthat/test-fixed_regex_linter.R b/tests/testthat/test-fixed_regex_linter.R index 8b84a36643..f7bac0e4c7 100644 --- a/tests/testthat/test-fixed_regex_linter.R +++ b/tests/testthat/test-fixed_regex_linter.R @@ -39,12 +39,36 @@ test_that("fixed_regex_linter blocks simple disallowed usages", { expect_lint("gregexpr('a-z', y)", lint_msg, linter) expect_lint("regexec('\\\\$', x)", lint_msg, linter) expect_lint("grep('\n', x)", lint_msg, linter) - expect_lint("grep('\\\\;', x)", lint_msg, linter) # naming the argument doesn't matter (if it's still used positionally) expect_lint("gregexpr(pattern = 'a-z', y)", lint_msg, linter) }) +patrick::with_parameters_test_that( + "fixed_regex_linter is robust to unrecognized escapes error", + { + expect_lint( + sprintf("grep('\\\\%s', x)", char), + rex::rex("This regular expression is static"), + fixed_regex_linter() + ) + + expect_lint( + sprintf("strsplit('a%sb', '\\\\%s')", char, char), + rex::rex("This regular expression is static"), + fixed_regex_linter() + ) + }, + .cases = data.frame( + char = c( + "^", "$", "{", "}", "(", ")", ".", "*", "+", "?", + "|", "[", "]", "\\\\", "<", ">", "=", ":", ";", "/", + "_", "-", "!", "@", "#", "%", "&", "~" + ), + stringsAsFactors = FALSE + ) +) + test_that("fixed_regex_linter catches regex like [.] or [$]", { linter <- fixed_regex_linter() lint_msg <- rex::rex("This regular expression is static") @@ -60,14 +84,15 @@ test_that("fixed_regex_linter catches null calls to strsplit as well", { linter <- fixed_regex_linter() expect_lint("strsplit(x, '^x')", NULL, linter) - expect_lint("tstrsplit(x, '[a-zA-Z]')", NULL, linter) - expect_lint("tstrsplit(x, fmt)", NULL, linter) expect_lint("strsplit(x, '\\\\s')", NULL, linter) expect_lint("strsplit(x, 'a(?=b)', perl = TRUE)", NULL, linter) expect_lint("strsplit(x, '0+1', perl = TRUE)", NULL, linter) - expect_lint("tstrsplit(x, '1*2')", NULL, linter) expect_lint("strsplit(x, 'a|b')", NULL, linter) + expect_lint("tstrsplit(x, '1*2')", NULL, linter) + expect_lint("tstrsplit(x, '[a-zA-Z]')", NULL, linter) + expect_lint("tstrsplit(x, fmt)", NULL, linter) + # if fixed=TRUE is already set, regex patterns don't matter expect_lint("strsplit(x, '\\\\.', fixed = TRUE)", NULL, linter) expect_lint("strsplit(x, '\\\\.', fixed = T)", NULL, linter) @@ -77,10 +102,10 @@ test_that("fixed_regex_linter catches calls to strsplit as well", { linter <- fixed_regex_linter() lint_msg <- rex::rex("This regular expression is static") - expect_lint("strsplit('a;b', '\\\\;')", lint_msg, linter) expect_lint("strsplit(x, '\\\\.')", lint_msg, linter) - expect_lint("tstrsplit(x, 'abcdefg')", lint_msg, linter) expect_lint("strsplit(x, '[.]')", lint_msg, linter) + + expect_lint("tstrsplit(x, 'abcdefg')", lint_msg, linter) }) test_that("fixed_regex_linter is more exact about distinguishing \\s from \\:", { @@ -161,6 +186,8 @@ test_that("one-character character classes with escaped characters are caught", expect_lint("gsub('[\\n]', '', x)", lint_msg, linter) expect_lint("gsub('[\\\"]', '', x)", lint_msg, linter) + expect_lint('gsub("\\\\<", "x", x, perl = TRUE)', lint_msg, linter) + expect_lint("str_split(x, '[\\1]')", lint_msg, linter) expect_lint("str_split(x, '[\\12]')", lint_msg, linter) expect_lint("str_split(x, '[\\123]')", lint_msg, linter) @@ -175,7 +202,6 @@ test_that("one-character character classes with escaped characters are caught", expect_lint("str_split(x, '[\\u{1}]')", lint_msg, linter) expect_lint("str_split(x, '[\\U{F7D5}]')", lint_msg, linter) expect_lint("str_split(x, '[\\U{1D4D7}]')", lint_msg, linter) - expect_lint('gsub("\\\\<", "x", x, perl = TRUE)', lint_msg, linter) }) test_that("bracketed unicode escapes are caught", { @@ -223,7 +249,11 @@ test_that("fixed replacements vectorize and recognize str_detect", { ) # stringr hint works - expect_lint("str_detect(x, 'abc')", rex::rex('Here, you can use stringr::fixed("abc") as the pattern'), linter) + expect_lint( + "str_detect(x, 'abc')", + rex::rex('Here, you can use stringr::fixed("abc") as the pattern'), + linter + ) }) test_that("fixed replacement is correct with UTF-8", {