@@ -141,104 +141,3 @@ fixed_regex_linter <- function() {
141141 )
142142 })
143143}
144-
145- rx_non_active_char <- rex :: rex(none_of(" ^${(.*+?|[\\ " ))
146- rx_static_escape <- local({
147- rx_char_escape <- rex :: rex(or(
148- group(" \\ " , none_of(alnum )),
149- group(" \\ x" , between(xdigit , 1L , 2L )),
150- group(" \\ " , between(" 0" : " 7" , 1L , 3L )),
151- group(" \\ u{" , between(xdigit , 1L , 4L ), " }" ),
152- group(" \\ u" , between(xdigit , 1L , 4L )),
153- group(" \\ U{" , between(xdigit , 1L , 8L ), " }" ),
154- group(" \\ U" , between(xdigit , 1L , 8L ))
155- ))
156- rx_trivial_char_group <- rex :: rex(
157- " [" ,
158- or(
159- any ,
160- group(" \\ " , none_of(" dswDSW" )), # character classes, e.g. \d are enabled in [] too if perl = TRUE
161- rx_char_escape
162- ),
163- " ]"
164- )
165- rex :: rex(or(
166- capture(rx_char_escape , name = " char_escape" ),
167- capture(rx_trivial_char_group , name = " trivial_char_group" )
168- ))
169- })
170-
171- rx_static_token <- local({
172- rex :: rex(or(
173- rx_non_active_char ,
174- rx_static_escape
175- ))
176- })
177-
178- rx_static_regex <- paste0(" (?s)" , rex :: rex(start , zero_or_more(rx_static_token ), end ))
179- rx_first_static_token <- paste0(" (?s)" , rex :: rex(start , zero_or_more(rx_non_active_char ), rx_static_escape ))
180-
181- # ' Determine whether a regex pattern actually uses regex patterns
182- # '
183- # ' Note that is applies to the strings that are found on the XML parse tree,
184- # ' _not_ plain strings. This is important for backslash escaping, which
185- # ' happens at different layers of escaping than one might expect. So testing
186- # ' this function is best done through testing the expected results of a lint
187- # ' on a given file, rather than passing strings to this function, which can
188- # ' be confusing.
189- # '
190- # ' @param str A character vector.
191- # ' @return A logical vector, `TRUE` wherever `str` could be replaced by a
192- # ' string with `fixed = TRUE`.
193- # ' @noRd
194- is_not_regex <- function (str ) {
195- # need to add single-line option to allow literal newlines
196- grepl(rx_static_regex , str , perl = TRUE )
197- }
198-
199- # ' Compute a fixed string equivalent to a static regular expression
200- # '
201- # ' @param static_regex A regex for which `is_not_regex()` returns `TRUE`
202- # ' @return A string such that `grepl(static_regex, x)` is equivalent to
203- # ' `grepl(get_fixed_string(static_regex), x, fixed = TRUE)`
204- # '
205- # ' @noRd
206- get_fixed_string <- function (static_regex ) {
207- if (length(static_regex ) == 0L ) {
208- return (character ())
209- } else if (length(static_regex ) > 1L ) {
210- return (vapply(static_regex , get_fixed_string , character (1L )))
211- }
212- fixed_string <- " "
213- current_match <- regexpr(rx_first_static_token , static_regex , perl = TRUE )
214- while (current_match != - 1L ) {
215- token_type <- attr(current_match , " capture.names" )[attr(current_match , " capture.start" ) > 0L ]
216- token_start <- max(attr(current_match , " capture.start" ))
217- if (token_start > 1L ) {
218- fixed_string <- paste0(fixed_string , substr(static_regex , 1L , token_start - 1L ))
219- }
220- consume_to <- attr(current_match , " match.length" )
221- token_content <- substr(static_regex , token_start , consume_to )
222- fixed_string <- paste0(fixed_string , get_token_replacement(token_content , token_type ))
223- static_regex <- substr(static_regex , start = consume_to + 1L , stop = nchar(static_regex ))
224- current_match <- regexpr(rx_first_static_token , static_regex , perl = TRUE )
225- }
226- paste0(fixed_string , static_regex )
227- }
228-
229- get_token_replacement <- function (token_content , token_type ) {
230- if (token_type == " trivial_char_group" ) {
231- token_content <- substr(token_content , start = 2L , stop = nchar(token_content ) - 1L )
232- if (startsWith(token_content , " \\ " )) { # escape within trivial char group
233- get_token_replacement(token_content , " char_escape" )
234- } else {
235- token_content
236- }
237- } else { # char_escape token
238- if (rex :: re_matches(token_content , rex :: rex(" \\ " , one_of(" ^${}().*+?|[]\\ <>=:;/_-!@#%&,~" )))) {
239- substr(token_content , start = 2L , stop = nchar(token_content ))
240- } else {
241- eval(parse(text = paste0(' "' , token_content , ' "' )))
242- }
243- }
244- }
0 commit comments