Implement pycodestyle's logical line detection

astral-sh · Dec 7, 2022 · 6458aa3 · 6458aa3
1 parent 528416f
commit 6458aa3
Show file tree

Hide file tree

Showing 5 changed files with 195 additions and 2 deletions.
diff --git a/foo.py b/foo.py
@@ -0,0 +1,4 @@
+x = \
+	1
+
+print(x)
diff --git a/src/check_logical_lines.rs b/src/check_logical_lines.rs
@@ -0,0 +1,185 @@
+use std::borrow::Cow;
+
+use once_cell::sync::Lazy;
+use regex::Regex;
+use rustpython_ast::Location;
+use rustpython_parser::lexer::{LexResult, Tok};
+
+use crate::ast::types::Range;
+use crate::source_code_locator::SourceCodeLocator;
+
+fn build_line(tokens: &[(&Location, &Tok, &Location)], locator: &SourceCodeLocator) -> String {
+    let mut logical = String::new();
+    let mut mapping = Vec::new();
+    let mut prev: Option<&Location> = None;
+    for (start, tok, end) in tokens {
+        if matches!(tok, Tok::Newline | Tok::Indent | Tok::Dedent | Tok::Comment) {
+            continue;
+        }
+        if mapping.is_empty() {
+            mapping.push((0, start));
+        }
+
+        // TODO(charlie): "Mute" strings.
+        let text = if let Tok::String { .. } = tok {
+            Cow::from("\"\"")
+        } else {
+            locator.slice_source_code_range(&Range {
+                location: **start,
+                end_location: **end,
+            })
+        };
+
+        if let Some(prev) = prev {
+            if prev.row() != start.row() {
+                let prev_text = locator.slice_source_code_range(&Range {
+                    location: *prev,
+                    end_location: Location::new(prev.row() + 1, 0),
+                });
+                if prev_text == ","
+                    || ((prev_text != "{" && prev_text != "[" && prev_text != "(")
+                        && (text != "}" || text != "]" || text != ")"))
+                {
+                    logical.push(' ');
+                }
+            } else if prev.column() != start.column() {
+                let prev_text = locator.slice_source_code_range(&Range {
+                    location: *prev,
+                    end_location: **start,
+                });
+                logical.push_str(&prev_text);
+            }
+        }
+        logical.push_str(&text);
+        mapping.push((text.len(), end));
+        prev = Some(end);
+    }
+    logical
+}
+
+pub fn logical_lines(tokens: &[LexResult], locator: &SourceCodeLocator) -> Vec<String> {
+    let mut parens = 0;
+    let mut accumulator = vec![];
+    let mut lines = vec![];
+    for (start, tok, end) in tokens.iter().flatten() {
+        accumulator.push((start, tok, end));
+        if matches!(tok, Tok::Lbrace | Tok::Lpar | Tok::Lsqb) {
+            parens += 1;
+        } else if matches!(tok, Tok::Rbrace | Tok::Rpar | Tok::Rsqb) {
+            parens -= 1;
+        } else if parens == 0 {
+            if matches!(tok, Tok::Newline) {
+                lines.push(build_line(&accumulator, locator));
+                accumulator.drain(..);
+            }
+        }
+    }
+    lines
+}
+
+static OPERATOR_REGEX: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());
+
+pub fn check_logical_lines(tokens: &[LexResult], locator: &SourceCodeLocator) {
+    for line in logical_lines(tokens, locator) {
+        for line_match in OPERATOR_REGEX.captures_iter(&line) {
+            let before = line_match.get(1).unwrap().as_str();
+            let after = line_match.get(2).unwrap().as_str();
+
+            if before.contains('\t') {
+                println!("E223 tab before operator: {line:?}");
+            } else if before.len() > 1 {
+                println!("E221 multiple spaces before operator: {line:?}");
+            }
+
+            if after.contains('\t') {
+                println!("E224 tab after operator: {line:?}");
+            } else if after.len() > 1 {
+                println!("E224 multiple spaces after operator: {line:?}");
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rustpython_parser::lexer;
+    use rustpython_parser::lexer::LexResult;
+
+    use crate::check_logical_lines::{check_logical_lines, logical_lines};
+    use crate::SourceCodeLocator;
+
+    #[test]
+    fn test_logical_lines() {
+        let contents = "a = 12 + 3";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        check_logical_lines(&lxr, &locator);
+
+        let contents = "a = 4 +  5";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        check_logical_lines(&lxr, &locator);
+
+        let contents = "a = 4  + 5";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        check_logical_lines(&lxr, &locator);
+
+        let contents = "a = 4\t + 5";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        check_logical_lines(&lxr, &locator);
+
+        let contents = "a = 4 + \t5";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        check_logical_lines(&lxr, &locator);
+    }
+
+    #[test]
+    fn split_logical_lines() {
+        let contents = "x = 1
+y = 2
+z = x + 1";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        println!("{:?}", logical_lines(&lxr, &locator));
+
+        let contents = "x = [
+  1,
+  2,
+  3,
+]
+y = 2
+z = x + 1";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        println!("{:?}", logical_lines(&lxr, &locator));
+
+        let contents = "x = 'abc'";
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        println!("{:?}", logical_lines(&lxr, &locator));
+
+        let contents = "def f():
+  x = 1
+
+f()";
+
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        println!("{:?}", logical_lines(&lxr, &locator));
+
+        let contents = r#"def f():
+  """Docstring goes here."""
+  # Comment goes here.
+  x = 1
+
+f()"#;
+
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let locator = SourceCodeLocator::new(contents);
+        println!("{:?}", logical_lines(&lxr, &locator));
+    }
+}
diff --git a/src/check_lines.rs → src/check_physical_lines.rs b/src/check_lines.rs → src/check_physical_lines.rs
diff --git a/src/lib.rs b/src/lib.rs
@@ -29,7 +29,8 @@ pub mod autofix;
 pub mod cache;
 pub mod check_ast;
 mod check_imports;
-mod check_lines;
+pub mod check_logical_lines;
+mod check_physical_lines;
 mod check_tokens;
 pub mod checks;
 pub mod checks_gen;

diff --git a/src/linter.rs b/src/linter.rs
@@ -15,7 +15,8 @@ use crate::autofix::fixer;
 use crate::autofix::fixer::fix_file;
 use crate::check_ast::check_ast;
 use crate::check_imports::check_imports;
-use crate::check_lines::check_lines;
+use crate::check_logical_lines::check_logical_lines;
+use crate::check_physical_lines::check_lines;
 use crate::check_tokens::check_tokens;
 use crate::checks::{Check, CheckCode, CheckKind, LintSource};
 use crate::code_gen::SourceGenerator;
@@ -70,6 +71,8 @@ pub(crate) fn check_path(
         checks.extend(check_tokens(locator, &tokens, settings, autofix));
     }
 
+    check_logical_lines(&tokens, locator);
+
     // Run the AST-based checks.
     let use_ast = settings
         .enabled