Add extraneous whitespace rules

astral-sh · Feb 4, 2023 · bc510c8 · bc510c8
1 parent 61f2d7c
commit bc510c8
Show file tree

Hide file tree

Showing 12 changed files with 403 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -701,6 +701,9 @@ For more, see [pycodestyle](https://pypi.org/project/pycodestyle/) on PyPI.
 | Code | Name | Message | Fix |
 | ---- | ---- | ------- | --- |
 | E101 | mixed-spaces-and-tabs | Indentation contains mixed spaces and tabs |  |
+| E201 | whitespace-after-open-bracket | Whitespace after '(' |  |
+| E202 | whitespace-before-close-bracket | Whitespace before ')' |  |
+| E203 | whitespace-before-punctuation | Whitespace before ',', ';', or ':' |  |
 | E221 | multiple-spaces-before-operator | Multiple spaces before operator |  |
 | E222 | multiple-spaces-after-operator | Multiple spaces after operator |  |
 | E223 | tab-before-operator | Tab before operator |  |

diff --git a/resources/test/fixtures/pycodestyle/E20.py b/resources/test/fixtures/pycodestyle/E20.py
@@ -0,0 +1,78 @@
+#: E201:1:6
+spam( ham[1], {eggs: 2})
+#: E201:1:10
+spam(ham[ 1], {eggs: 2})
+#: E201:1:15
+spam(ham[1], { eggs: 2})
+#: E201:1:6
+spam(	ham[1], {eggs: 2})
+#: E201:1:10
+spam(ham[	1], {eggs: 2})
+#: E201:1:15
+spam(ham[1], {	eggs: 2})
+#: Okay
+spam(ham[1], {eggs: 2})
+#:
+
+
+#: E202:1:23
+spam(ham[1], {eggs: 2} )
+#: E202:1:22
+spam(ham[1], {eggs: 2 })
+#: E202:1:11
+spam(ham[1 ], {eggs: 2})
+#: E202:1:23
+spam(ham[1], {eggs: 2}	)
+#: E202:1:22
+spam(ham[1], {eggs: 2	})
+#: E202:1:11
+spam(ham[1	], {eggs: 2})
+#: Okay
+spam(ham[1], {eggs: 2})
+
+result = func(
+    arg1='some value',
+    arg2='another value',
+)
+
+result = func(
+    arg1='some value',
+    arg2='another value'
+)
+
+result = [
+    item for item in items
+    if item > 5
+]
+#:
+
+
+#: E203:1:10
+if x == 4 :
+    print x, y
+    x, y = y, x
+#: E203:1:10
+if x == 4	:
+    print x, y
+    x, y = y, x
+#: E203:2:15 E702:2:16
+if x == 4:
+    print x, y ; x, y = y, x
+#: E203:2:15 E702:2:16
+if x == 4:
+    print x, y	; x, y = y, x
+#: E203:3:13
+if x == 4:
+    print x, y
+    x, y = y , x
+#: E203:3:13
+if x == 4:
+    print x, y
+    x, y = y	, x
+#: Okay
+if x == 4:
+    print x, y
+    x, y = y, x
+a[b1, :] == a[b1, ...]
+b = a[:, b1]
+#:
diff --git a/ruff.schema.json b/ruff.schema.json
@@ -1447,6 +1447,10 @@
         "E10",
         "E101",
         "E2",
+        "E20",
+        "E201",
+        "E202",
+        "E203",
         "E22",
         "E221",
         "E222",

diff --git a/src/checkers/logical_lines.rs b/src/checkers/logical_lines.rs
@@ -5,7 +5,7 @@ use rustpython_parser::lexer::{LexResult, Tok};
 
 use crate::ast::types::Range;
 use crate::registry::Diagnostic;
-use crate::rules::pycodestyle::rules::space_around_operator;
+use crate::rules::pycodestyle::rules::{extraneous_whitespace, space_around_operator};
 use crate::settings::Settings;
 use crate::source_code::Locator;
 
@@ -15,11 +15,17 @@ struct LogicalLine {
     mapping: Vec<(usize, Location)>,
     /// Whether the logical line contains an operator.
     operator: bool,
+    /// Whether the logical line contains a comment.
+    bracket: bool,
+    /// Whether the logical line contains a punctuation mark.
+    punctuation: bool,
 }
 
 fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> LogicalLine {
     let mut logical = String::with_capacity(88);
     let mut operator = false;
+    let mut bracket = false;
+    let mut punctuation = false;
     let mut mapping = Vec::new();
     let mut prev: Option<&Location> = None;
     let mut length = 0;
@@ -67,9 +73,20 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
             );
         }
 
+        if !bracket {
+            bracket |= matches!(
+                tok,
+                Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
+            );
+        }
+
+        if !punctuation {
+            punctuation |= matches!(tok, Tok::Comma | Tok::Semi | Tok::Colon);
+        }
+
         // TODO(charlie): "Mute" strings.
         let text = if let Tok::String { .. } = tok {
-            "\"\""
+            "\"xxx\""
         } else {
             locator.slice_source_code_range(&Range {
                 location: *start,
@@ -80,12 +97,12 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
         if let Some(prev) = prev {
             if prev.row() != start.row() {
                 let prev_text = locator.slice_source_code_range(&Range {
-                    location: *prev,
-                    end_location: Location::new(prev.row() + 1, 0),
+                    location: Location::new(prev.row(), prev.column() - 1),
+                    end_location: Location::new(prev.row(), prev.column()),
                 });
                 if prev_text == ","
                     || ((prev_text != "{" && prev_text != "[" && prev_text != "(")
-                        && (text != "}" || text != "]" || text != ")"))
+                        && (text != "}" && text != "]" && text != ")"))
                 {
                     logical.push(' ');
                     length += 1;
@@ -108,6 +125,8 @@ fn build_line(tokens: &[(Location, &Tok, Location)], locator: &Locator) -> Logic
     LogicalLine {
         text: logical,
         operator,
+        bracket,
+        punctuation,
         mapping,
     }
 }
@@ -139,8 +158,8 @@ pub fn check_logical_lines(
 ) -> Vec<Diagnostic> {
     let mut diagnostics = vec![];
     for line in iter_logical_lines(tokens, locator) {
+        let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();
         if line.operator {
-            let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();
             for (index, kind) in space_around_operator(&line.text) {
                 let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
                 let location = Location::new(pos.row(), pos.column() + index - token_offset);
@@ -155,6 +174,21 @@ pub fn check_logical_lines(
                 }
             }
         }
+        if line.bracket || line.punctuation {
+            for (index, kind) in extraneous_whitespace(&line.text) {
+                let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
+                let location = Location::new(pos.row(), pos.column() + index - token_offset);
+                if settings.rules.enabled(kind.rule()) {
+                    diagnostics.push(Diagnostic {
+                        kind,
+                        location,
+                        end_location: location,
+                        fix: None,
+                        parent: None,
+                    });
+                }
+            }
+        }
     }
     diagnostics
 }
@@ -201,7 +235,7 @@ z = x + 1"#;
             .map(|line| line.text)
             .collect();
         let expected = vec![
-            "x = [ 1, 2, 3, ]".to_string(),
+            "x = [1, 2, 3, ]".to_string(),
             "y = 2".to_string(),
             "z = x + 1".to_string(),
         ];
@@ -214,7 +248,7 @@ z = x + 1"#;
             .into_iter()
             .map(|line| line.text)
             .collect();
-        let expected = vec!["x = \"\"".to_string()];
+        let expected = vec!["x = \"xxx\"".to_string()];
         assert_eq!(actual, expected);
 
         let contents = r#"
@@ -242,7 +276,7 @@ f()"#;
             .into_iter()
             .map(|line| line.text)
             .collect();
-        let expected = vec!["def f():", "\"\"", "x = 1", "f()"];
+        let expected = vec!["def f():", "\"xxx\"", "x = 1", "f()"];
         assert_eq!(actual, expected);
     }
 }
diff --git a/src/registry.rs b/src/registry.rs
@@ -13,6 +13,9 @@ use crate::violation::Violation;
 ruff_macros::define_rule_mapping!(
     // pycodestyle errors
     E101 => rules::pycodestyle::rules::MixedSpacesAndTabs,
+    E201 => rules::pycodestyle::rules::WhitespaceAfterOpenBracket,
+    E202 => rules::pycodestyle::rules::WhitespaceBeforeCloseBracket,
+    E203 => rules::pycodestyle::rules::WhitespaceBeforePunctuation,
     E221 => rules::pycodestyle::rules::MultipleSpacesBeforeOperator,
     E222 => rules::pycodestyle::rules::MultipleSpacesAfterOperator,
     E223 => rules::pycodestyle::rules::TabBeforeOperator,
@@ -689,9 +692,12 @@ impl Rule {
         match self {
             Rule::UnusedNOQA => &LintSource::NoQa,
             Rule::TabBeforeOperator
-            | Rule::MultipleSpacesBeforeOperator
             | Rule::MultipleSpacesAfterOperator
-            | Rule::TabAfterOperator => &LintSource::LogicalLines,
+            | Rule::MultipleSpacesBeforeOperator
+            | Rule::TabAfterOperator
+            | Rule::WhitespaceAfterOpenBracket
+            | Rule::WhitespaceBeforeCloseBracket
+            | Rule::WhitespaceBeforePunctuation => &LintSource::LogicalLines,
             Rule::BlanketNOQA
             | Rule::BlanketTypeIgnore
             | Rule::DocLineTooLong

diff --git a/src/rules/pycodestyle/mod.rs b/src/rules/pycodestyle/mod.rs
@@ -16,6 +16,9 @@ mod tests {
     use crate::test::test_path;
     use crate::{assert_yaml_snapshot, settings};
 
+    #[test_case(Rule::WhitespaceAfterOpenBracket, Path::new("E20.py"))]
+    #[test_case(Rule::WhitespaceBeforeCloseBracket, Path::new("E20.py"))]
+    #[test_case(Rule::WhitespaceBeforePunctuation, Path::new("E20.py"))]
     #[test_case(Rule::TabBeforeOperator, Path::new("E22.py"))]
     #[test_case(Rule::MultipleSpacesBeforeOperator, Path::new("E22.py"))]
     #[test_case(Rule::TabAfterOperator, Path::new("E22.py"))]

diff --git a/src/rules/pycodestyle/rules/extraneous_whitespace.rs b/src/rules/pycodestyle/rules/extraneous_whitespace.rs
@@ -0,0 +1,63 @@
+use once_cell::sync::Lazy;
+use regex::Regex;
+
+use ruff_macros::derive_message_formats;
+
+use crate::define_violation;
+use crate::registry::DiagnosticKind;
+use crate::violation::Violation;
+
+define_violation!(
+    pub struct WhitespaceAfterOpenBracket;
+);
+impl Violation for WhitespaceAfterOpenBracket {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Whitespace after '('")
+    }
+}
+
+define_violation!(
+    pub struct WhitespaceBeforeCloseBracket;
+);
+impl Violation for WhitespaceBeforeCloseBracket {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Whitespace before ')'")
+    }
+}
+
+define_violation!(
+    pub struct WhitespaceBeforePunctuation;
+);
+impl Violation for WhitespaceBeforePunctuation {
+    #[derive_message_formats]
+    fn message(&self) -> String {
+        format!("Whitespace before ',', ';', or ':'")
+    }
+}
+
+// TODO(charlie): Pycodestyle has a negative lookahead on the end.
+static EXTRANEOUS_WHITESPACE_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"([\[({][ \t]|[ \t][]}),;:])").unwrap());
+
+/// E201, E202, E203
+pub fn extraneous_whitespace(line: &str) -> Vec<(usize, DiagnosticKind)> {
+    let mut diagnostics = vec![];
+    for line_match in EXTRANEOUS_WHITESPACE_REGEX.captures_iter(line) {
+        let match_ = line_match.get(1).unwrap();
+        let text = match_.as_str();
+        let char = text.trim();
+        let found = match_.start();
+        if text.chars().last().unwrap().is_ascii_whitespace() {
+            diagnostics.push((found + 1, WhitespaceAfterOpenBracket.into()));
+        } else if line.chars().nth(found - 1).map_or(false, |c| c != ',') {
+            if char == "}" || char == "]" || char == ")" {
+                diagnostics.push((found, WhitespaceBeforeCloseBracket.into()));
+            } else {
+                diagnostics.push((found, WhitespaceBeforePunctuation.into()));
+            }
+        }
+    }
+    diagnostics
+}
diff --git a/src/rules/pycodestyle/rules/mod.rs b/src/rules/pycodestyle/rules/mod.rs
@@ -5,6 +5,10 @@ pub use do_not_assign_lambda::{do_not_assign_lambda, DoNotAssignLambda};
 pub use do_not_use_bare_except::{do_not_use_bare_except, DoNotUseBareExcept};
 pub use doc_line_too_long::{doc_line_too_long, DocLineTooLong};
 pub use errors::{syntax_error, IOError, SyntaxError};
+pub use extraneous_whitespace::{
+    extraneous_whitespace, WhitespaceAfterOpenBracket, WhitespaceBeforeCloseBracket,
+    WhitespaceBeforePunctuation,
+};
 pub use imports::{
     module_import_not_at_top_of_file, multiple_imports_on_one_line, ModuleImportNotAtTopOfFile,
     MultipleImportsOnOneLine,
@@ -28,6 +32,7 @@ mod do_not_assign_lambda;
 mod do_not_use_bare_except;
 mod doc_line_too_long;
 mod errors;
+mod extraneous_whitespace;
 mod imports;
 mod invalid_escape_sequence;
 mod line_too_long;

diff --git a/src/rules/pycodestyle/rules/space_around_operator.rs b/src/rules/pycodestyle/rules/space_around_operator.rs
@@ -50,6 +50,7 @@ impl Violation for MultipleSpacesAfterOperator {
 static OPERATOR_REGEX: Lazy<Regex> =
     Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
 
+/// E221, E222, E223, E224
 pub fn space_around_operator(line: &str) -> Vec<(usize, DiagnosticKind)> {
     let mut diagnostics = vec![];
     for line_match in OPERATOR_REGEX.captures_iter(line) {