From aa9b5a4f21794ee3bcb616d07bc099374734babf Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 31 Jan 2023 18:16:37 -0500 Subject: [PATCH] Implement pycodestyle's logical line detection --- src/checkers/logical_lines.rs | 184 +++++++++++++++++++ src/checkers/mod.rs | 3 +- src/checkers/{lines.rs => physical_lines.rs} | 0 src/linter.rs | 6 +- 4 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 src/checkers/logical_lines.rs rename src/checkers/{lines.rs => physical_lines.rs} (100%) diff --git a/src/checkers/logical_lines.rs b/src/checkers/logical_lines.rs new file mode 100644 index 00000000000000..ac27b3ffae7039 --- /dev/null +++ b/src/checkers/logical_lines.rs @@ -0,0 +1,184 @@ +use once_cell::sync::Lazy; +use regex::Regex; +use rustpython_ast::Location; +use rustpython_parser::lexer::{LexResult, Tok}; + +use crate::ast::types::Range; +use crate::source_code::Locator; + +fn build_line(tokens: &[(&Location, &Tok, &Location)], locator: &Locator) -> String { + let mut logical = String::new(); + let mut mapping = Vec::new(); + let mut prev: Option<&Location> = None; + for (start, tok, end) in tokens { + if matches!( + tok, + Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent | Tok::Comment { .. } + ) { + continue; + } + if mapping.is_empty() { + mapping.push((0, start)); + } + + // TODO(charlie): "Mute" strings. + let text = if let Tok::String { .. } = tok { + "\"\"" + } else { + locator.slice_source_code_range(&Range { + location: **start, + end_location: **end, + }) + }; + + if let Some(prev) = prev { + if prev.row() != start.row() { + let prev_text = locator.slice_source_code_range(&Range { + location: *prev, + end_location: Location::new(prev.row() + 1, 0), + }); + if prev_text == "," + || ((prev_text != "{" && prev_text != "[" && prev_text != "(") + && (text != "}" || text != "]" || text != ")")) + { + logical.push(' '); + } + } else if prev.column() != start.column() { + let prev_text = locator.slice_source_code_range(&Range { + location: *prev, + end_location: **start, + }); + logical.push_str(&prev_text); + } + } + logical.push_str(&text); + mapping.push((text.len(), end)); + prev = Some(end); + } + logical +} + +fn iter_logical_lines(tokens: &[LexResult], locator: &Locator) -> Vec { + let mut parens = 0; + let mut accumulator = vec![]; + let mut lines = vec![]; + for (start, tok, end) in tokens.iter().flatten() { + accumulator.push((start, tok, end)); + if matches!(tok, Tok::Lbrace | Tok::Lpar | Tok::Lsqb) { + parens += 1; + } else if matches!(tok, Tok::Rbrace | Tok::Rpar | Tok::Rsqb) { + parens -= 1; + } else if parens == 0 { + if matches!(tok, Tok::Newline) { + lines.push(build_line(&accumulator, locator)); + accumulator.drain(..); + } + } + } + lines +} + +static OPERATOR_REGEX: Lazy = + Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap()); + +pub fn check_logical_lines(tokens: &[LexResult], locator: &Locator) { + for line in iter_logical_lines(tokens, locator) { + for line_match in OPERATOR_REGEX.captures_iter(&line) { + let before = line_match.get(1).unwrap().as_str(); + let after = line_match.get(2).unwrap().as_str(); + + if before.contains('\t') { + println!("E223 tab before operator: {line:?}"); + } else if before.len() > 1 { + println!("E221 multiple spaces before operator: {line:?}"); + } + + if after.contains('\t') { + println!("E224 tab after operator: {line:?}"); + } else if after.len() > 1 { + println!("E224 multiple spaces after operator: {line:?}"); + } + } + } +} + +#[cfg(test)] +mod tests { + use rustpython_parser::lexer; + use rustpython_parser::lexer::LexResult; + + use crate::checkers::logical_lines::{check_logical_lines, iter_logical_lines}; + use crate::source_code::Locator; + + #[test] + fn test_logical_lines() { + let contents = "a = 12 + 3"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + check_logical_lines(&lxr, &locator); + + let contents = "a = 4 + 5"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + check_logical_lines(&lxr, &locator); + + let contents = "a = 4 + 5"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + check_logical_lines(&lxr, &locator); + + let contents = "a = 4\t + 5"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + check_logical_lines(&lxr, &locator); + + let contents = "a = 4 + \t5"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + check_logical_lines(&lxr, &locator); + } + + #[test] + fn split_logical_lines() { + let contents = "x = 1 +y = 2 +z = x + 1"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + println!("{:?}", iter_logical_lines(&lxr, &locator)); + + let contents = "x = [ + 1, + 2, + 3, +] +y = 2 +z = x + 1"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + println!("{:?}", iter_logical_lines(&lxr, &locator)); + + let contents = "x = 'abc'"; + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + println!("{:?}", iter_logical_lines(&lxr, &locator)); + + let contents = "def f(): + x = 1 +f()"; + + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + println!("{:?}", iter_logical_lines(&lxr, &locator)); + + let contents = r#"def f(): + """Docstring goes here.""" + # Comment goes here. + x = 1 +f()"#; + + let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let locator = Locator::new(contents); + println!("{:?}", iter_logical_lines(&lxr, &locator)); + } +} diff --git a/src/checkers/mod.rs b/src/checkers/mod.rs index cec54e376bdffa..0befee33cef527 100644 --- a/src/checkers/mod.rs +++ b/src/checkers/mod.rs @@ -1,6 +1,7 @@ pub mod ast; pub mod filesystem; pub mod imports; -pub mod lines; +pub mod logical_lines; pub mod noqa; +pub mod physical_lines; pub mod tokens; diff --git a/src/checkers/lines.rs b/src/checkers/physical_lines.rs similarity index 100% rename from src/checkers/lines.rs rename to src/checkers/physical_lines.rs diff --git a/src/linter.rs b/src/linter.rs index fff8aebfb27918..11ae02533c64c1 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -9,8 +9,9 @@ use crate::autofix::fix_file; use crate::checkers::ast::check_ast; use crate::checkers::filesystem::check_file_path; use crate::checkers::imports::check_imports; -use crate::checkers::lines::check_lines; +use crate::checkers::logical_lines::check_logical_lines; use crate::checkers::noqa::check_noqa; +use crate::checkers::physical_lines::check_lines; use crate::checkers::tokens::check_tokens; use crate::directives::Directives; use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens}; @@ -71,6 +72,9 @@ pub fn check_path( diagnostics.extend(check_file_path(path, package, settings)); } + // Run the logical line-based rules. + check_logical_lines(&tokens, locator); + // Run the AST-based rules. let use_ast = settings .rules