Skip to content

Commit

Permalink
Add extraneous whitespace rules
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Feb 4, 2023
1 parent 61f2d7c commit bc510c8
Show file tree
Hide file tree
Showing 12 changed files with 403 additions and 11 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,9 @@ For more, see [pycodestyle](https://pypi.org/project/pycodestyle/) on PyPI.
| Code | Name | Message | Fix |
| ---- | ---- | ------- | --- |
| E101 | mixed-spaces-and-tabs | Indentation contains mixed spaces and tabs | |
| E201 | whitespace-after-open-bracket | Whitespace after '(' | |
| E202 | whitespace-before-close-bracket | Whitespace before ')' | |
| E203 | whitespace-before-punctuation | Whitespace before ',', ';', or ':' | |
| E221 | multiple-spaces-before-operator | Multiple spaces before operator | |
| E222 | multiple-spaces-after-operator | Multiple spaces after operator | |
| E223 | tab-before-operator | Tab before operator | |
Expand Down
78 changes: 78 additions & 0 deletions resources/test/fixtures/pycodestyle/E20.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#: E201:1:6
spam( ham[1], {eggs: 2})
#: E201:1:10
spam(ham[ 1], {eggs: 2})
#: E201:1:15
spam(ham[1], { eggs: 2})
#: E201:1:6
spam( ham[1], {eggs: 2})
#: E201:1:10
spam(ham[ 1], {eggs: 2})
#: E201:1:15
spam(ham[1], { eggs: 2})
#: Okay
spam(ham[1], {eggs: 2})
#:


#: E202:1:23
spam(ham[1], {eggs: 2} )
#: E202:1:22
spam(ham[1], {eggs: 2 })
#: E202:1:11
spam(ham[1 ], {eggs: 2})
#: E202:1:23
spam(ham[1], {eggs: 2} )
#: E202:1:22
spam(ham[1], {eggs: 2 })
#: E202:1:11
spam(ham[1 ], {eggs: 2})
#: Okay
spam(ham[1], {eggs: 2})

result = func(
arg1='some value',
arg2='another value',
)

result = func(
arg1='some value',
arg2='another value'
)

result = [
item for item in items
if item > 5
]
#:


#: E203:1:10
if x == 4 :
print x, y
x, y = y, x
#: E203:1:10
if x == 4 :
print x, y
x, y = y, x
#: E203:2:15 E702:2:16
if x == 4:
print x, y ; x, y = y, x
#: E203:2:15 E702:2:16
if x == 4:
print x, y ; x, y = y, x
#: E203:3:13
if x == 4:
print x, y
x, y = y , x
#: E203:3:13
if x == 4:
print x, y
x, y = y , x
#: Okay
if x == 4:
print x, y
x, y = y, x
a[b1, :] == a[b1, ...]
b = a[:, b1]
#:
4 changes: 4 additions & 0 deletions ruff.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1447,6 +1447,10 @@
"E10",
"E101",
"E2",
"E20",
"E201",
"E202",
"E203",
"E22",
"E221",
"E222",
Expand Down
52 changes: 43 additions & 9 deletions src/checkers/logical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use rustpython_parser::lexer::{LexResult, Tok};

use crate::ast::types::Range;
use crate::registry::Diagnostic;
use crate::rules::pycodestyle::rules::space_around_operator;
use crate::rules::pycodestyle::rules::{extraneous_whitespace, space_around_operator};
use crate::settings::Settings;
use crate::source_code::Locator;

Expand All @@ -15,11 +15,17 @@ struct LogicalLine {
mapping: Vec<(usize, Location)>,
/// Whether the logical line contains an operator.
operator: bool,
/// Whether the logical line contains a comment.
bracket: bool,
/// Whether the logical line contains a punctuation mark.
punctuation: bool,
}

fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> LogicalLine {
let mut logical = String::with_capacity(88);
let mut operator = false;
let mut bracket = false;
let mut punctuation = false;
let mut mapping = Vec::new();
let mut prev: Option<&Location> = None;
let mut length = 0;
Expand Down Expand Up @@ -67,9 +73,20 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
);
}

if !bracket {
bracket |= matches!(
tok,
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
);
}

if !punctuation {
punctuation |= matches!(tok, Tok::Comma | Tok::Semi | Tok::Colon);
}

// TODO(charlie): "Mute" strings.
let text = if let Tok::String { .. } = tok {
"\"\""
"\"xxx\""
} else {
locator.slice_source_code_range(&Range {
location: *start,
Expand All @@ -80,12 +97,12 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
if let Some(prev) = prev {
if prev.row() != start.row() {
let prev_text = locator.slice_source_code_range(&Range {
location: *prev,
end_location: Location::new(prev.row() + 1, 0),
location: Location::new(prev.row(), prev.column() - 1),
end_location: Location::new(prev.row(), prev.column()),
});
if prev_text == ","
|| ((prev_text != "{" && prev_text != "[" && prev_text != "(")
&& (text != "}" || text != "]" || text != ")"))
&& (text != "}" && text != "]" && text != ")"))
{
logical.push(' ');
length += 1;
Expand All @@ -108,6 +125,8 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
LogicalLine {
text: logical,
operator,
bracket,
punctuation,
mapping,
}
}
Expand Down Expand Up @@ -139,8 +158,8 @@ pub fn check_logical_lines(
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
for line in iter_logical_lines(tokens, locator) {
let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();
if line.operator {
let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();
for (index, kind) in space_around_operator(&line.text) {
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
let location = Location::new(pos.row(), pos.column() + index - token_offset);
Expand All @@ -155,6 +174,21 @@ pub fn check_logical_lines(
}
}
}
if line.bracket || line.punctuation {
for (index, kind) in extraneous_whitespace(&line.text) {
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
let location = Location::new(pos.row(), pos.column() + index - token_offset);
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
location,
end_location: location,
fix: None,
parent: None,
});
}
}
}
}
diagnostics
}
Expand Down Expand Up @@ -201,7 +235,7 @@ z = x + 1"#;
.map(|line| line.text)
.collect();
let expected = vec![
"x = [ 1, 2, 3, ]".to_string(),
"x = [1, 2, 3, ]".to_string(),
"y = 2".to_string(),
"z = x + 1".to_string(),
];
Expand All @@ -214,7 +248,7 @@ z = x + 1"#;
.into_iter()
.map(|line| line.text)
.collect();
let expected = vec!["x = \"\"".to_string()];
let expected = vec!["x = \"xxx\"".to_string()];
assert_eq!(actual, expected);

let contents = r#"
Expand Down Expand Up @@ -242,7 +276,7 @@ f()"#;
.into_iter()
.map(|line| line.text)
.collect();
let expected = vec!["def f():", "\"\"", "x = 1", "f()"];
let expected = vec!["def f():", "\"xxx\"", "x = 1", "f()"];
assert_eq!(actual, expected);
}
}
10 changes: 8 additions & 2 deletions src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ use crate::violation::Violation;
ruff_macros::define_rule_mapping!(
// pycodestyle errors
E101 => rules::pycodestyle::rules::MixedSpacesAndTabs,
E201 => rules::pycodestyle::rules::WhitespaceAfterOpenBracket,
E202 => rules::pycodestyle::rules::WhitespaceBeforeCloseBracket,
E203 => rules::pycodestyle::rules::WhitespaceBeforePunctuation,
E221 => rules::pycodestyle::rules::MultipleSpacesBeforeOperator,
E222 => rules::pycodestyle::rules::MultipleSpacesAfterOperator,
E223 => rules::pycodestyle::rules::TabBeforeOperator,
Expand Down Expand Up @@ -689,9 +692,12 @@ impl Rule {
match self {
Rule::UnusedNOQA => &LintSource::NoQa,
Rule::TabBeforeOperator
| Rule::MultipleSpacesBeforeOperator
| Rule::MultipleSpacesAfterOperator
| Rule::TabAfterOperator => &LintSource::LogicalLines,
| Rule::MultipleSpacesBeforeOperator
| Rule::TabAfterOperator
| Rule::WhitespaceAfterOpenBracket
| Rule::WhitespaceBeforeCloseBracket
| Rule::WhitespaceBeforePunctuation => &LintSource::LogicalLines,
Rule::BlanketNOQA
| Rule::BlanketTypeIgnore
| Rule::DocLineTooLong
Expand Down
3 changes: 3 additions & 0 deletions src/rules/pycodestyle/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ mod tests {
use crate::test::test_path;
use crate::{assert_yaml_snapshot, settings};

#[test_case(Rule::WhitespaceAfterOpenBracket, Path::new("E20.py"))]
#[test_case(Rule::WhitespaceBeforeCloseBracket, Path::new("E20.py"))]
#[test_case(Rule::WhitespaceBeforePunctuation, Path::new("E20.py"))]
#[test_case(Rule::TabBeforeOperator, Path::new("E22.py"))]
#[test_case(Rule::MultipleSpacesBeforeOperator, Path::new("E22.py"))]
#[test_case(Rule::TabAfterOperator, Path::new("E22.py"))]
Expand Down
63 changes: 63 additions & 0 deletions src/rules/pycodestyle/rules/extraneous_whitespace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
use once_cell::sync::Lazy;
use regex::Regex;

use ruff_macros::derive_message_formats;

use crate::define_violation;
use crate::registry::DiagnosticKind;
use crate::violation::Violation;

define_violation!(
pub struct WhitespaceAfterOpenBracket;
);
impl Violation for WhitespaceAfterOpenBracket {
#[derive_message_formats]
fn message(&self) -> String {
format!("Whitespace after '('")
}
}

define_violation!(
pub struct WhitespaceBeforeCloseBracket;
);
impl Violation for WhitespaceBeforeCloseBracket {
#[derive_message_formats]
fn message(&self) -> String {
format!("Whitespace before ')'")
}
}

define_violation!(
pub struct WhitespaceBeforePunctuation;
);
impl Violation for WhitespaceBeforePunctuation {
#[derive_message_formats]
fn message(&self) -> String {
format!("Whitespace before ',', ';', or ':'")
}
}

// TODO(charlie): Pycodestyle has a negative lookahead on the end.
static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap());

/// E201, E202, E203
pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) {
let match_ = line_match.get(1).unwrap();
let text = match_.as_str();
let char = text.trim();
let found = match_.start();
if text.chars().last().unwrap().is_ascii_whitespace() {
diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
} else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
if char == "}" || char == "]" || char == ")" {
diagnostics.push((found, WhitespaceBeforeCloseBracket.into()));
} else {
diagnostics.push((found, WhitespaceBeforePunctuation.into()));
}
}
}
diagnostics
}
5 changes: 5 additions & 0 deletions src/rules/pycodestyle/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ pub use do_not_assign_lambda::{do_not_assign_lambda, DoNotAssignLambda};
pub use do_not_use_bare_except::{do_not_use_bare_except, DoNotUseBareExcept};
pub use doc_line_too_long::{doc_line_too_long, DocLineTooLong};
pub use errors::{syntax_error, IOError, SyntaxError};
pub use extraneous_whitespace::{
extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket,
WhitespaceBeforePunctuation,
};
pub use imports::{
module_import_not_at_top_of_file, multiple_imports_on_one_line, ModuleImportNotAtTopOfFile,
MultipleImportsOnOneLine,
Expand All @@ -28,6 +32,7 @@ mod do_not_assign_lambda;
mod do_not_use_bare_except;
mod doc_line_too_long;
mod errors;
mod extraneous_whitespace;
mod imports;
mod invalid_escape_sequence;
mod line_too_long;
Expand Down
1 change: 1 addition & 0 deletions src/rules/pycodestyle/rules/space_around_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ impl Violation for MultipleSpacesAfterOperator {
static OPERATOR_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());

/// E221, E222, E223, E224
pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
let mut diagnostics = vec![];
for line_match in OPERATOR_REGEX.captures_iter(line) {
Expand Down
Loading

0 comments on commit bc510c8

Please sign in to comment.