diff --git a/crates/ruff/resources/test/fixtures/ruff/RUF017.py b/crates/ruff/resources/test/fixtures/ruff/RUF017.py new file mode 100644 index 0000000000000..f7c062d894778 --- /dev/null +++ b/crates/ruff/resources/test/fixtures/ruff/RUF017.py @@ -0,0 +1,14 @@ +x = [1, 2, 3] +y = [4, 5, 6] + +# RUF017 +sum([x, y], start=[]) +sum([x, y], []) +sum([[1, 2, 3], [4, 5, 6]], start=[]) +sum([[1, 2, 3], [4, 5, 6]], []) +sum([[1, 2, 3], [4, 5, 6]], + []) + +# OK +sum([x, y]) +sum([[1, 2, 3], [4, 5, 6]]) diff --git a/crates/ruff/src/checkers/ast/analyze/expression.rs b/crates/ruff/src/checkers/ast/analyze/expression.rs index 3ab6021c1a5d8..14278acc53fa9 100644 --- a/crates/ruff/src/checkers/ast/analyze/expression.rs +++ b/crates/ruff/src/checkers/ast/analyze/expression.rs @@ -873,6 +873,9 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) { if checker.enabled(Rule::UnsupportedMethodCallOnAll) { flake8_pyi::rules::unsupported_method_call_on_all(checker, func); } + if checker.enabled(Rule::QuadraticListSummation) { + ruff::rules::quadratic_list_summation(checker, call); + } } Expr::Dict(ast::ExprDict { keys, diff --git a/crates/ruff/src/codes.rs b/crates/ruff/src/codes.rs index 8d011aafa196e..d7f115bb29065 100644 --- a/crates/ruff/src/codes.rs +++ b/crates/ruff/src/codes.rs @@ -816,6 +816,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> { (Ruff, "014") => (RuleGroup::Nursery, rules::ruff::rules::UnreachableCode), (Ruff, "015") => (RuleGroup::Unspecified, rules::ruff::rules::UnnecessaryIterableAllocationForFirstElement), (Ruff, "016") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidIndexType), + (Ruff, "017") => (RuleGroup::Nursery, rules::ruff::rules::QuadraticListSummation), (Ruff, "100") => (RuleGroup::Unspecified, rules::ruff::rules::UnusedNOQA), (Ruff, "200") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidPyprojectToml), diff --git a/crates/ruff/src/rules/ruff/mod.rs b/crates/ruff/src/rules/ruff/mod.rs index e49a97709ee9c..be54ab37f2f7c 100644 --- a/crates/ruff/src/rules/ruff/mod.rs +++ b/crates/ruff/src/rules/ruff/mod.rs @@ -40,6 +40,7 @@ mod tests { feature = "unreachable-code", test_case(Rule::UnreachableCode, Path::new("RUF014.py")) )] + #[test_case(Rule::QuadraticListSummation, Path::new("RUF017.py"))] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff/src/rules/ruff/rules/mod.rs b/crates/ruff/src/rules/ruff/rules/mod.rs index e4d39feefd825..8ffa324791c30 100644 --- a/crates/ruff/src/rules/ruff/rules/mod.rs +++ b/crates/ruff/src/rules/ruff/rules/mod.rs @@ -40,3 +40,6 @@ pub(crate) enum Context { Docstring, Comment, } +pub(crate) use quadratic_list_summation::*; + +mod quadratic_list_summation; diff --git a/crates/ruff/src/rules/ruff/rules/quadratic_list_summation.rs b/crates/ruff/src/rules/ruff/rules/quadratic_list_summation.rs new file mode 100644 index 0000000000000..21966e5778777 --- /dev/null +++ b/crates/ruff/src/rules/ruff/rules/quadratic_list_summation.rs @@ -0,0 +1,133 @@ +use anyhow::Result; + +use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; +use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::{self as ast, Arguments, Expr, Ranged}; +use ruff_python_semantic::SemanticModel; + +use crate::importer::ImportRequest; +use crate::{checkers::ast::Checker, registry::Rule}; + +/// ## What it does +/// Checks for the use of `sum()` to flatten lists of lists, which has +/// quadratic complexity. +/// +/// ## Why is this bad? +/// The use of `sum()` to flatten lists of lists is quadratic in the number of +/// lists, as `sum()` creates a new list for each element in the summation. +/// +/// Instead, consider using another method of flattening lists to avoid +/// quadratic complexity. The following methods are all linear in the number of +/// lists: +/// +/// - `functools.reduce(operator.iconcat, lists, [])` +/// - `list(itertools.chain.from_iterable(lists)` +/// - `[item for sublist in lists for item in sublist]` +/// +/// ## Example +/// ```python +/// lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] +/// joined = sum(lists, []) +/// ``` +/// +/// Use instead: +/// ```python +/// import functools +/// import operator +/// +/// +/// lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] +/// functools.reduce(operator.iconcat, lists, []) +/// ``` +/// +/// ## References +/// - [_How Not to Flatten a List of Lists in Python_](https://mathieularose.com/how-not-to-flatten-a-list-of-lists-in-python) +/// - [_How do I make a flat list out of a list of lists?_](https://stackoverflow.com/questions/952914/how-do-i-make-a-flat-list-out-of-a-list-of-lists/953097#953097) +#[violation] +pub struct QuadraticListSummation; + +impl AlwaysAutofixableViolation for QuadraticListSummation { + #[derive_message_formats] + fn message(&self) -> String { + format!("Avoid quadratic list summation") + } + + fn autofix_title(&self) -> String { + format!("Replace with `functools.reduce`") + } +} + +/// RUF017 +pub(crate) fn quadratic_list_summation(checker: &mut Checker, call: &ast::ExprCall) { + let ast::ExprCall { + func, + arguments, + range, + } = call; + + if !func_is_builtin(func, "sum", checker.semantic()) { + return; + } + + if !start_is_empty_list(arguments, checker.semantic()) { + return; + }; + + let Some(iterable) = arguments.args.first() else { + return; + }; + + let mut diagnostic = Diagnostic::new(QuadraticListSummation, *range); + if checker.patch(Rule::QuadraticListSummation) { + diagnostic.try_set_fix(|| convert_to_reduce(iterable, call, checker)); + } + checker.diagnostics.push(diagnostic); +} + +/// Generate a [`Fix`] to convert a `sum()` call to a `functools.reduce()` call. +fn convert_to_reduce(iterable: &Expr, call: &ast::ExprCall, checker: &Checker) -> Result { + let (reduce_edit, reduce_binding) = checker.importer().get_or_import_symbol( + &ImportRequest::import("functools", "reduce"), + call.start(), + checker.semantic(), + )?; + + let (iadd_edit, iadd_binding) = checker.importer().get_or_import_symbol( + &ImportRequest::import("operator", "iadd"), + iterable.start(), + checker.semantic(), + )?; + + let iterable = checker.locator().slice(iterable.range()); + + Ok(Fix::suggested_edits( + Edit::range_replacement( + format!("{reduce_binding}({iadd_binding}, {iterable}, [])"), + call.range(), + ), + [reduce_edit, iadd_edit], + )) +} + +/// Check if a function is a builtin with a given name. +fn func_is_builtin(func: &Expr, name: &str, semantic: &SemanticModel) -> bool { + let Expr::Name(ast::ExprName { id, .. }) = func else { + return false; + }; + id == name && semantic.is_builtin(id) +} + +/// Returns `true` if the `start` argument to a `sum()` call is an empty list. +fn start_is_empty_list(arguments: &Arguments, semantic: &SemanticModel) -> bool { + let Some(keyword) = arguments.find_keyword("start") else { + return false; + }; + + match &keyword.value { + Expr::Call(ast::ExprCall { + func, arguments, .. + }) => arguments.is_empty() && func_is_builtin(func, "list", semantic), + Expr::List(ast::ExprList { elts, ctx, .. }) => elts.is_empty() && ctx.is_load(), + _ => false, + } +} diff --git a/crates/ruff/src/rules/ruff/snapshots/ruff__rules__ruff__tests__RUF017_RUF017.py.snap b/crates/ruff/src/rules/ruff/snapshots/ruff__rules__ruff__tests__RUF017_RUF017.py.snap new file mode 100644 index 0000000000000..161fa644c1986 --- /dev/null +++ b/crates/ruff/src/rules/ruff/snapshots/ruff__rules__ruff__tests__RUF017_RUF017.py.snap @@ -0,0 +1,53 @@ +--- +source: crates/ruff/src/rules/ruff/mod.rs +--- +RUF017.py:5:1: RUF017 [*] Avoid quadratic list summation + | +4 | # RUF017 +5 | sum([x, y], start=[]) + | ^^^^^^^^^^^^^^^^^^^^^ RUF017 +6 | sum([x, y], []) +7 | sum([[1, 2, 3], [4, 5, 6]], start=[]) + | + = help: Replace with `functools.reduce` + +ℹ Suggested fix + 1 |+import functools + 2 |+import operator +1 3 | x = [1, 2, 3] +2 4 | y = [4, 5, 6] +3 5 | +4 6 | # RUF017 +5 |-sum([x, y], start=[]) + 7 |+functools.reduce(operator.iadd, [x, y], []) +6 8 | sum([x, y], []) +7 9 | sum([[1, 2, 3], [4, 5, 6]], start=[]) +8 10 | sum([[1, 2, 3], [4, 5, 6]], []) + +RUF017.py:7:1: RUF017 [*] Avoid quadratic list summation + | +5 | sum([x, y], start=[]) +6 | sum([x, y], []) +7 | sum([[1, 2, 3], [4, 5, 6]], start=[]) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RUF017 +8 | sum([[1, 2, 3], [4, 5, 6]], []) +9 | sum([[1, 2, 3], [4, 5, 6]], + | + = help: Replace with `functools.reduce` + +ℹ Suggested fix + 1 |+import functools + 2 |+import operator +1 3 | x = [1, 2, 3] +2 4 | y = [4, 5, 6] +3 5 | +4 6 | # RUF017 +5 7 | sum([x, y], start=[]) +6 8 | sum([x, y], []) +7 |-sum([[1, 2, 3], [4, 5, 6]], start=[]) + 9 |+functools.reduce(operator.iadd, [[1, 2, 3], [4, 5, 6]], []) +8 10 | sum([[1, 2, 3], [4, 5, 6]], []) +9 11 | sum([[1, 2, 3], [4, 5, 6]], +10 12 | []) + + diff --git a/ruff.schema.json b/ruff.schema.json index 04ccd6b10fb99..74a05d9fadf78 100644 --- a/ruff.schema.json +++ b/ruff.schema.json @@ -2471,6 +2471,7 @@ "RUF013", "RUF015", "RUF016", + "RUF017", "RUF1", "RUF10", "RUF100",