Skip to content

Commit

Permalink
Implement pycodestyle's logical line detection
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Dec 7, 2022
1 parent 528416f commit 6458aa3
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 2 deletions.
4 changes: 4 additions & 0 deletions foo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
x = \
1

print(x)
185 changes: 185 additions & 0 deletions src/check_logical_lines.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
use std::borrow::Cow;

use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_ast::Location;
use rustpython_parser::lexer::{LexResult, Tok};

use crate::ast::types::Range;
use crate::source_code_locator::SourceCodeLocator;

fn build_line(tokens: &[(&Location, &Tok, &Location)], locator: &SourceCodeLocator) -> String {
let mut logical = String::new();
let mut mapping = Vec::new();
let mut prev: Option<&Location> = None;
for (start, tok, end) in tokens {
if matches!(tok, Tok::Newline | Tok::Indent | Tok::Dedent | Tok::Comment) {
continue;
}
if mapping.is_empty() {
mapping.push((0, start));
}

// TODO(charlie): "Mute" strings.
let text = if let Tok::String { .. } = tok {
Cow::from("\"\"")
} else {
locator.slice_source_code_range(&Range {
location: **start,
end_location: **end,
})
};

if let Some(prev) = prev {
if prev.row() != start.row() {
let prev_text = locator.slice_source_code_range(&Range {
location: *prev,
end_location: Location::new(prev.row() + 1, 0),
});
if prev_text == ","
|| ((prev_text != "{" && prev_text != "[" && prev_text != "(")
&& (text != "}" || text != "]" || text != ")"))
{
logical.push(' ');
}
} else if prev.column() != start.column() {
let prev_text = locator.slice_source_code_range(&Range {
location: *prev,
end_location: **start,
});
logical.push_str(&prev_text);
}
}
logical.push_str(&text);
mapping.push((text.len(), end));
prev = Some(end);
}
logical
}

pub fn logical_lines(tokens: &[LexResult], locator: &SourceCodeLocator) -> Vec<String> {
let mut parens = 0;
let mut accumulator = vec![];
let mut lines = vec![];
for (start, tok, end) in tokens.iter().flatten() {
accumulator.push((start, tok, end));
if matches!(tok, Tok::Lbrace | Tok::Lpar | Tok::Lsqb) {
parens += 1;
} else if matches!(tok, Tok::Rbrace | Tok::Rpar | Tok::Rsqb) {
parens -= 1;
} else if parens == 0 {
if matches!(tok, Tok::Newline) {
lines.push(build_line(&accumulator, locator));
accumulator.drain(..);
}
}
}
lines
}

static OPERATOR_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[^,\s](\s*)(?:[-+*/|!<=>%&^]+|:=)(\s*)").unwrap());

pub fn check_logical_lines(tokens: &[LexResult], locator: &SourceCodeLocator) {
for line in logical_lines(tokens, locator) {
for line_match in OPERATOR_REGEX.captures_iter(&line) {
let before = line_match.get(1).unwrap().as_str();
let after = line_match.get(2).unwrap().as_str();

if before.contains('\t') {
println!("E223 tab before operator: {line:?}");
} else if before.len() > 1 {
println!("E221 multiple spaces before operator: {line:?}");
}

if after.contains('\t') {
println!("E224 tab after operator: {line:?}");
} else if after.len() > 1 {
println!("E224 multiple spaces after operator: {line:?}");
}
}
}
}

#[cfg(test)]
mod tests {
use rustpython_parser::lexer;
use rustpython_parser::lexer::LexResult;

use crate::check_logical_lines::{check_logical_lines, logical_lines};
use crate::SourceCodeLocator;

#[test]
fn test_logical_lines() {
let contents = "a = 12 + 3";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
check_logical_lines(&lxr, &locator);

let contents = "a = 4 + 5";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
check_logical_lines(&lxr, &locator);

let contents = "a = 4 + 5";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
check_logical_lines(&lxr, &locator);

let contents = "a = 4\t + 5";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
check_logical_lines(&lxr, &locator);

let contents = "a = 4 + \t5";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
check_logical_lines(&lxr, &locator);
}

#[test]
fn split_logical_lines() {
let contents = "x = 1
y = 2
z = x + 1";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
println!("{:?}", logical_lines(&lxr, &locator));

let contents = "x = [
1,
2,
3,
]
y = 2
z = x + 1";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
println!("{:?}", logical_lines(&lxr, &locator));

let contents = "x = 'abc'";
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
println!("{:?}", logical_lines(&lxr, &locator));

let contents = "def f():
x = 1
f()";

let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
println!("{:?}", logical_lines(&lxr, &locator));

let contents = r#"def f():
"""Docstring goes here."""
# Comment goes here.
x = 1
f()"#;

let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let locator = SourceCodeLocator::new(contents);
println!("{:?}", logical_lines(&lxr, &locator));
}
}
File renamed without changes.
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ pub mod autofix;
pub mod cache;
pub mod check_ast;
mod check_imports;
mod check_lines;
pub mod check_logical_lines;
mod check_physical_lines;
mod check_tokens;
pub mod checks;
pub mod checks_gen;
Expand Down
5 changes: 4 additions & 1 deletion src/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ use crate::autofix::fixer;
use crate::autofix::fixer::fix_file;
use crate::check_ast::check_ast;
use crate::check_imports::check_imports;
use crate::check_lines::check_lines;
use crate::check_logical_lines::check_logical_lines;
use crate::check_physical_lines::check_lines;
use crate::check_tokens::check_tokens;
use crate::checks::{Check, CheckCode, CheckKind, LintSource};
use crate::code_gen::SourceGenerator;
Expand Down Expand Up @@ -70,6 +71,8 @@ pub(crate) fn check_path(
checks.extend(check_tokens(locator, &tokens, settings, autofix));
}

check_logical_lines(&tokens, locator);

// Run the AST-based checks.
let use_ast = settings
.enabled
Expand Down

0 comments on commit 6458aa3

Please sign in to comment.