Skip to content

Commit 511ec0d

Browse files
Refactor shebang parsing to remove regex dependency (#5690)
## Summary Similar to #5567, we can remove the use of regex, plus simplify the representation (use `Option`), add snapshot tests, etc. This is about 100x faster than using a regex for cases that match (2.5ns vs. 250ns). It's obviously not a hot path, but I prefer the consistency with other similar comment-parsing. I may DRY these up into some common functionality later on.
1 parent 30bec3f commit 511ec0d

10 files changed

+161
-128
lines changed

crates/ruff/src/checkers/physical_lines.rs

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use ruff_python_whitespace::UniversalNewlines;
99

1010
use crate::registry::Rule;
1111
use crate::rules::flake8_copyright::rules::missing_copyright_notice;
12-
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
12+
use crate::rules::flake8_executable::helpers::ShebangDirective;
1313
use crate::rules::flake8_executable::rules::{
1414
shebang_missing, shebang_newline, shebang_not_executable, shebang_python, shebang_whitespace,
1515
};
@@ -87,33 +87,35 @@ pub(crate) fn check_physical_lines(
8787
|| enforce_shebang_newline
8888
|| enforce_shebang_python
8989
{
90-
let shebang = extract_shebang(&line);
91-
if enforce_shebang_not_executable {
92-
if let Some(diagnostic) = shebang_not_executable(path, line.range(), &shebang) {
93-
diagnostics.push(diagnostic);
90+
if let Some(shebang) = ShebangDirective::try_extract(&line) {
91+
has_any_shebang = true;
92+
if enforce_shebang_not_executable {
93+
if let Some(diagnostic) =
94+
shebang_not_executable(path, line.range(), &shebang)
95+
{
96+
diagnostics.push(diagnostic);
97+
}
9498
}
95-
}
96-
if enforce_shebang_missing {
97-
if !has_any_shebang && matches!(shebang, ShebangDirective::Match(..)) {
98-
has_any_shebang = true;
99+
if enforce_shebang_whitespace {
100+
if let Some(diagnostic) =
101+
shebang_whitespace(line.range(), &shebang, fix_shebang_whitespace)
102+
{
103+
diagnostics.push(diagnostic);
104+
}
99105
}
100-
}
101-
if enforce_shebang_whitespace {
102-
if let Some(diagnostic) =
103-
shebang_whitespace(line.range(), &shebang, fix_shebang_whitespace)
104-
{
105-
diagnostics.push(diagnostic);
106+
if enforce_shebang_newline {
107+
if let Some(diagnostic) =
108+
shebang_newline(line.range(), &shebang, index == 0)
109+
{
110+
diagnostics.push(diagnostic);
111+
}
106112
}
107-
}
108-
if enforce_shebang_newline {
109-
if let Some(diagnostic) = shebang_newline(line.range(), &shebang, index == 0) {
110-
diagnostics.push(diagnostic);
111-
}
112-
}
113-
if enforce_shebang_python {
114-
if let Some(diagnostic) = shebang_python(line.range(), &shebang) {
115-
diagnostics.push(diagnostic);
113+
if enforce_shebang_python {
114+
if let Some(diagnostic) = shebang_python(line.range(), &shebang) {
115+
diagnostics.push(diagnostic);
116+
}
116117
}
118+
} else {
117119
}
118120
}
119121
}

crates/ruff/src/rules/flake8_executable/helpers.rs

Lines changed: 62 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,84 +5,88 @@ use std::path::Path;
55

66
#[cfg(target_family = "unix")]
77
use anyhow::Result;
8-
use once_cell::sync::Lazy;
9-
use regex::Regex;
108
use ruff_text_size::{TextLen, TextSize};
119

12-
static SHEBANG_REGEX: Lazy<Regex> =
13-
Lazy::new(|| Regex::new(r"^(?P<spaces>\s*)#!(?P<directive>.*)").unwrap());
14-
10+
/// A shebang directive (e.g., `#!/usr/bin/env python3`).
1511
#[derive(Debug, PartialEq, Eq)]
16-
pub(crate) enum ShebangDirective<'a> {
17-
None,
18-
// whitespace length, start of the shebang, contents
19-
Match(TextSize, TextSize, &'a str),
12+
pub(crate) struct ShebangDirective<'a> {
13+
/// The offset of the directive contents (e.g., `/usr/bin/env python3`) from the start of the
14+
/// line.
15+
pub(crate) offset: TextSize,
16+
/// The contents of the directive (e.g., `"/usr/bin/env python3"`).
17+
pub(crate) contents: &'a str,
2018
}
2119

22-
pub(crate) fn extract_shebang(line: &str) -> ShebangDirective {
23-
// Minor optimization to avoid matches in the common case.
24-
if !line.contains('!') {
25-
return ShebangDirective::None;
20+
impl<'a> ShebangDirective<'a> {
21+
/// Parse a shebang directive from a line, or return `None` if the line does not contain a
22+
/// shebang directive.
23+
pub(crate) fn try_extract(line: &'a str) -> Option<Self> {
24+
// Trim whitespace.
25+
let directive = Self::lex_whitespace(line);
26+
27+
// Trim the `#!` prefix.
28+
let directive = Self::lex_char(directive, '#')?;
29+
let directive = Self::lex_char(directive, '!')?;
30+
31+
Some(Self {
32+
offset: line.text_len() - directive.text_len(),
33+
contents: directive,
34+
})
2635
}
27-
match SHEBANG_REGEX.captures(line) {
28-
Some(caps) => match caps.name("spaces") {
29-
Some(spaces) => match caps.name("directive") {
30-
Some(matches) => ShebangDirective::Match(
31-
spaces.as_str().text_len(),
32-
TextSize::try_from(matches.start()).unwrap(),
33-
matches.as_str(),
34-
),
35-
None => ShebangDirective::None,
36-
},
37-
None => ShebangDirective::None,
38-
},
39-
None => ShebangDirective::None,
36+
37+
/// Lex optional leading whitespace.
38+
#[inline]
39+
fn lex_whitespace(line: &str) -> &str {
40+
line.trim_start()
41+
}
42+
43+
/// Lex a specific character, or return `None` if the character is not the first character in
44+
/// the line.
45+
#[inline]
46+
fn lex_char(line: &str, c: char) -> Option<&str> {
47+
let mut chars = line.chars();
48+
if chars.next() == Some(c) {
49+
Some(chars.as_str())
50+
} else {
51+
None
52+
}
4053
}
4154
}
4255

4356
#[cfg(target_family = "unix")]
44-
pub(crate) fn is_executable(filepath: &Path) -> Result<bool> {
45-
{
46-
let metadata = filepath.metadata()?;
47-
let permissions = metadata.permissions();
48-
Ok(permissions.mode() & 0o111 != 0)
49-
}
57+
pub(super) fn is_executable(filepath: &Path) -> Result<bool> {
58+
let metadata = filepath.metadata()?;
59+
let permissions = metadata.permissions();
60+
Ok(permissions.mode() & 0o111 != 0)
5061
}
5162

5263
#[cfg(test)]
5364
mod tests {
54-
use ruff_text_size::TextSize;
65+
use insta::assert_debug_snapshot;
66+
67+
use crate::rules::flake8_executable::helpers::ShebangDirective;
5568

56-
use crate::rules::flake8_executable::helpers::{
57-
extract_shebang, ShebangDirective, SHEBANG_REGEX,
58-
};
69+
#[test]
70+
fn shebang_non_match() {
71+
let source = "not a match";
72+
assert_debug_snapshot!(ShebangDirective::try_extract(source));
73+
}
5974

6075
#[test]
61-
fn shebang_regex() {
62-
// Positive cases
63-
assert!(SHEBANG_REGEX.is_match("#!/usr/bin/python"));
64-
assert!(SHEBANG_REGEX.is_match("#!/usr/bin/env python"));
65-
assert!(SHEBANG_REGEX.is_match(" #!/usr/bin/env python"));
66-
assert!(SHEBANG_REGEX.is_match(" #!/usr/bin/env python"));
76+
fn shebang_end_of_line() {
77+
let source = "print('test') #!/usr/bin/python";
78+
assert_debug_snapshot!(ShebangDirective::try_extract(source));
79+
}
6780

68-
// Negative cases
69-
assert!(!SHEBANG_REGEX.is_match("hello world"));
81+
#[test]
82+
fn shebang_match() {
83+
let source = "#!/usr/bin/env python";
84+
assert_debug_snapshot!(ShebangDirective::try_extract(source));
7085
}
7186

7287
#[test]
73-
fn shebang_extract_match() {
74-
assert_eq!(extract_shebang("not a match"), ShebangDirective::None);
75-
assert_eq!(
76-
extract_shebang("#!/usr/bin/env python"),
77-
ShebangDirective::Match(TextSize::from(0), TextSize::from(2), "/usr/bin/env python")
78-
);
79-
assert_eq!(
80-
extract_shebang(" #!/usr/bin/env python"),
81-
ShebangDirective::Match(TextSize::from(2), TextSize::from(4), "/usr/bin/env python")
82-
);
83-
assert_eq!(
84-
extract_shebang("print('test') #!/usr/bin/python"),
85-
ShebangDirective::None
86-
);
88+
fn shebang_leading_space() {
89+
let source = " #!/usr/bin/env python";
90+
assert_debug_snapshot!(ShebangDirective::try_extract(source));
8791
}
8892
}

crates/ruff/src/rules/flake8_executable/rules/shebang_newline.rs

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,14 @@ pub(crate) fn shebang_newline(
4747
shebang: &ShebangDirective,
4848
first_line: bool,
4949
) -> Option<Diagnostic> {
50-
if let ShebangDirective::Match(_, start, content) = shebang {
51-
if first_line {
52-
None
53-
} else {
54-
let diagnostic = Diagnostic::new(
55-
ShebangNotFirstLine,
56-
TextRange::at(range.start() + start, content.text_len()),
57-
);
58-
Some(diagnostic)
59-
}
60-
} else {
50+
let ShebangDirective { offset, contents } = shebang;
51+
52+
if first_line {
6153
None
54+
} else {
55+
Some(Diagnostic::new(
56+
ShebangNotFirstLine,
57+
TextRange::at(range.start() + offset, contents.text_len()),
58+
))
6259
}
6360
}

crates/ruff/src/rules/flake8_executable/rules/shebang_not_executable.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,16 @@ pub(crate) fn shebang_not_executable(
4848
range: TextRange,
4949
shebang: &ShebangDirective,
5050
) -> Option<Diagnostic> {
51-
if let ShebangDirective::Match(_, start, content) = shebang {
52-
if let Ok(false) = is_executable(filepath) {
53-
let diagnostic = Diagnostic::new(
54-
ShebangNotExecutable,
55-
TextRange::at(range.start() + start, content.text_len()),
56-
);
57-
return Some(diagnostic);
58-
}
51+
let ShebangDirective { offset, contents } = shebang;
52+
53+
if let Ok(false) = is_executable(filepath) {
54+
let diagnostic = Diagnostic::new(
55+
ShebangNotExecutable,
56+
TextRange::at(range.start() + offset, contents.text_len()),
57+
);
58+
return Some(diagnostic);
5959
}
60+
6061
None
6162
}
6263

crates/ruff/src/rules/flake8_executable/rules/shebang_python.rs

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,14 @@ impl Violation for ShebangMissingPython {
4242

4343
/// EXE003
4444
pub(crate) fn shebang_python(range: TextRange, shebang: &ShebangDirective) -> Option<Diagnostic> {
45-
if let ShebangDirective::Match(_, start, content) = shebang {
46-
if content.contains("python") || content.contains("pytest") {
47-
None
48-
} else {
49-
let diagnostic = Diagnostic::new(
50-
ShebangMissingPython,
51-
TextRange::at(range.start() + start, content.text_len())
52-
.sub_start(TextSize::from(2)),
53-
);
45+
let ShebangDirective { offset, contents } = shebang;
5446

55-
Some(diagnostic)
56-
}
57-
} else {
47+
if contents.contains("python") || contents.contains("pytest") {
5848
None
49+
} else {
50+
Some(Diagnostic::new(
51+
ShebangMissingPython,
52+
TextRange::at(range.start() + offset, contents.text_len()).sub_start(TextSize::from(2)),
53+
))
5954
}
6055
}

crates/ruff/src/rules/flake8_executable/rules/shebang_whitespace.rs

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use ruff_text_size::{TextRange, TextSize};
2+
use std::ops::Sub;
23

34
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
45
use ruff_macros::{derive_message_formats, violation};
@@ -49,22 +50,25 @@ pub(crate) fn shebang_whitespace(
4950
shebang: &ShebangDirective,
5051
autofix: bool,
5152
) -> Option<Diagnostic> {
52-
if let ShebangDirective::Match(n_spaces, start, ..) = shebang {
53-
if *n_spaces > TextSize::from(0) && *start == n_spaces + TextSize::from(2) {
54-
let mut diagnostic = Diagnostic::new(
55-
ShebangLeadingWhitespace,
56-
TextRange::at(range.start(), *n_spaces),
57-
);
58-
if autofix {
59-
diagnostic.set_fix(Fix::automatic(Edit::range_deletion(TextRange::at(
60-
range.start(),
61-
*n_spaces,
62-
))));
63-
}
64-
Some(diagnostic)
65-
} else {
66-
None
53+
let ShebangDirective {
54+
offset,
55+
contents: _,
56+
} = shebang;
57+
58+
if *offset > TextSize::from(2) {
59+
let leading_space_start = range.start();
60+
let leading_space_len = offset.sub(TextSize::new(2));
61+
let mut diagnostic = Diagnostic::new(
62+
ShebangLeadingWhitespace,
63+
TextRange::at(leading_space_start, leading_space_len),
64+
);
65+
if autofix {
66+
diagnostic.set_fix(Fix::automatic(Edit::range_deletion(TextRange::at(
67+
leading_space_start,
68+
leading_space_len,
69+
))));
6770
}
71+
Some(diagnostic)
6872
} else {
6973
None
7074
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
source: crates/ruff/src/rules/flake8_executable/helpers.rs
3+
expression: "ShebangDirective::try_extract(source)"
4+
---
5+
None
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
source: crates/ruff/src/rules/flake8_executable/helpers.rs
3+
expression: "ShebangDirective::try_extract(source)"
4+
---
5+
Some(
6+
ShebangDirective {
7+
offset: 4,
8+
contents: "/usr/bin/env python",
9+
},
10+
)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
source: crates/ruff/src/rules/flake8_executable/helpers.rs
3+
expression: "ShebangDirective::try_extract(source)"
4+
---
5+
Some(
6+
ShebangDirective {
7+
offset: 2,
8+
contents: "/usr/bin/env python",
9+
},
10+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
source: crates/ruff/src/rules/flake8_executable/helpers.rs
3+
expression: "ShebangDirective::try_extract(source)"
4+
---
5+
None

0 commit comments

Comments
 (0)