From ed471c7e8ee8dfb17cc9f6cf438e38a5e0b1b82e Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Mon, 28 Jul 2025 20:41:01 -0400 Subject: [PATCH 1/2] Add `DiagnosticReporter` trait, deduplicate Unicode checks Summary -- This PR is stacked on #19608 and of more dubious value. It bothered me to see that `Candidate::into_diagnostic` and `Candidate::report_diagnostic` were essentially the same, differing only in taking a `LintContext` instead of a `Checker`. This was also the only reason we needed to collect a `Vec` of `Candidate`s in `ambiguous_unicode_character` instead of reporting them directly. This PR fixes those two concerns, but at the cost of introducing a trait implemented by `LintContext` and `Checker`. I'm leaning towards it not being worth it, unless we think the trait will be useful elsewhere. If we do want to keep the trait, we could obviously move `report_diagnostic` into it and move the actual implementations into the trait impls. I held off doing that for now to avoid a big import diff, especially if we didn't want this change at all. Test Plan -- Existing tests --- crates/ruff_linter/src/checkers/ast/mod.rs | 33 +++++++ .../ruff/rules/ambiguous_unicode_character.rs | 99 +++++++------------ 2 files changed, 67 insertions(+), 65 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index f2587a6597936..f0a4746947b18 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -3326,3 +3326,36 @@ impl Drop for DiagnosticGuard<'_, '_> { } } } + +pub(crate) trait DiagnosticReporter<'a> { + /// Return a [`DiagnosticGuard`] for reporting a diagnostic if the corresponding rule is + /// enabled. + /// + /// The guard derefs to a [`Diagnostic`], so it can be used to further modify the diagnostic + /// before it is added to the collection in the context on `Drop`. + fn report_diagnostic_if_enabled<'chk, T: Violation>( + &'chk self, + kind: T, + range: TextRange, + ) -> Option>; +} + +impl<'a> DiagnosticReporter<'a> for Checker<'a> { + fn report_diagnostic_if_enabled<'chk, T: Violation>( + &'chk self, + kind: T, + range: TextRange, + ) -> Option> { + self.report_diagnostic_if_enabled(kind, range) + } +} + +impl<'a> DiagnosticReporter<'a> for LintContext<'a> { + fn report_diagnostic_if_enabled<'chk, T: Violation>( + &'chk self, + kind: T, + range: TextRange, + ) -> Option> { + self.report_diagnostic_if_enabled(kind, range) + } +} diff --git a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs index 4d599d40f794c..b5c4384c8b3e4 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -8,6 +8,7 @@ use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::Locator; use crate::Violation; +use crate::checkers::ast::DiagnosticReporter; use crate::checkers::ast::{Checker, LintContext}; use crate::preview::is_unicode_to_unicode_confusables_enabled; use crate::rules::ruff::rules::Context; @@ -180,9 +181,7 @@ pub(crate) fn ambiguous_unicode_character_comment( range: TextRange, ) { let text = locator.slice(range); - for candidate in ambiguous_unicode_character(text, range, context.settings()) { - candidate.into_diagnostic(Context::Comment, context); - } + ambiguous_unicode_character(text, range, context.settings(), Context::Comment, context); } /// RUF001, RUF002 @@ -203,38 +202,42 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like: match part { ast::StringLikePart::String(string_literal) => { let text = checker.locator().slice(string_literal); - for candidate in - ambiguous_unicode_character(text, string_literal.range(), checker.settings()) - { - candidate.report_diagnostic(checker, context); - } + ambiguous_unicode_character( + text, + string_literal.range(), + checker.settings(), + context, + checker, + ); } ast::StringLikePart::Bytes(_) => {} ast::StringLikePart::FString(FString { elements, .. }) | ast::StringLikePart::TString(TString { elements, .. }) => { for literal in elements.literals() { let text = checker.locator().slice(literal); - for candidate in - ambiguous_unicode_character(text, literal.range(), checker.settings()) - { - candidate.report_diagnostic(checker, context); - } + ambiguous_unicode_character( + text, + literal.range(), + checker.settings(), + context, + checker, + ); } } } } } -fn ambiguous_unicode_character( +fn ambiguous_unicode_character<'a>( text: &str, range: TextRange, settings: &LinterSettings, -) -> Vec { - let mut candidates = Vec::new(); - + context: Context, + reporter: &impl DiagnosticReporter<'a>, +) { // Most of the time, we don't need to check for ambiguous unicode characters at all. if text.is_ascii() { - return candidates; + return; } // Iterate over the "words" in the text. @@ -246,7 +249,7 @@ fn ambiguous_unicode_character( if !word_candidates.is_empty() { if word_flags.is_candidate_word() { for candidate in word_candidates.drain(..) { - candidates.push(candidate); + candidate.into_diagnostic(context, reporter, settings); } } word_candidates.clear(); @@ -264,7 +267,7 @@ fn ambiguous_unicode_character( current_char, representant, ); - candidates.push(candidate); + candidate.into_diagnostic(context, reporter, settings); } } } else if current_char.is_ascii() { @@ -289,13 +292,11 @@ fn ambiguous_unicode_character( if !word_candidates.is_empty() { if word_flags.is_candidate_word() { for candidate in word_candidates.drain(..) { - candidates.push(candidate); + candidate.into_diagnostic(context, reporter, settings); } } word_candidates.clear(); } - - candidates } bitflags! { @@ -341,62 +342,30 @@ impl Candidate { } } - fn into_diagnostic(self, context: Context, lint_context: &LintContext) { - if !lint_context - .settings() - .allowed_confusables - .contains(&self.confusable) - { - let char_range = TextRange::at(self.offset, self.confusable.text_len()); - match context { - Context::String => lint_context.report_diagnostic_if_enabled( - AmbiguousUnicodeCharacterString { - confusable: self.confusable, - representant: self.representant, - }, - char_range, - ), - Context::Docstring => lint_context.report_diagnostic_if_enabled( - AmbiguousUnicodeCharacterDocstring { - confusable: self.confusable, - representant: self.representant, - }, - char_range, - ), - Context::Comment => lint_context.report_diagnostic_if_enabled( - AmbiguousUnicodeCharacterComment { - confusable: self.confusable, - representant: self.representant, - }, - char_range, - ), - }; - } - } - - fn report_diagnostic(self, checker: &Checker, context: Context) { - if !checker - .settings() - .allowed_confusables - .contains(&self.confusable) - { + fn into_diagnostic<'a>( + self, + context: Context, + reporter: &impl DiagnosticReporter<'a>, + settings: &LinterSettings, + ) { + if !settings.allowed_confusables.contains(&self.confusable) { let char_range = TextRange::at(self.offset, self.confusable.text_len()); match context { - Context::String => checker.report_diagnostic_if_enabled( + Context::String => reporter.report_diagnostic_if_enabled( AmbiguousUnicodeCharacterString { confusable: self.confusable, representant: self.representant, }, char_range, ), - Context::Docstring => checker.report_diagnostic_if_enabled( + Context::Docstring => reporter.report_diagnostic_if_enabled( AmbiguousUnicodeCharacterDocstring { confusable: self.confusable, representant: self.representant, }, char_range, ), - Context::Comment => checker.report_diagnostic_if_enabled( + Context::Comment => reporter.report_diagnostic_if_enabled( AmbiguousUnicodeCharacterComment { confusable: self.confusable, representant: self.representant, From 49c5d57b593491559c0bedea71c391b40e2c086d Mon Sep 17 00:00:00 2001 From: Brent Westbrook Date: Tue, 29 Jul 2025 15:15:48 -0400 Subject: [PATCH 2/2] use Checker::context instead of a trait --- crates/ruff_linter/src/checkers/ast/mod.rs | 43 ++++------------ .../ruff/rules/ambiguous_unicode_character.rs | 51 ++++++++----------- 2 files changed, 31 insertions(+), 63 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index f0a4746947b18..8f9e5511fc352 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -590,6 +590,16 @@ impl<'a> Checker<'a> { member, }) } + + /// Return the [`LintContext`] for the current analysis. + /// + /// Note that you should always prefer calling methods like `settings`, `report_diagnostic`, or + /// `is_rule_enabled` directly on [`Checker`] when possible. This method exists only for the + /// rare cases where rules or helper functions need to be accessed by both a `Checker` and a + /// `LintContext` in different analysis phases. + pub(crate) const fn context(&self) -> &'a LintContext<'a> { + self.context + } } pub(crate) struct TypingImporter<'a, 'b> { @@ -3326,36 +3336,3 @@ impl Drop for DiagnosticGuard<'_, '_> { } } } - -pub(crate) trait DiagnosticReporter<'a> { - /// Return a [`DiagnosticGuard`] for reporting a diagnostic if the corresponding rule is - /// enabled. - /// - /// The guard derefs to a [`Diagnostic`], so it can be used to further modify the diagnostic - /// before it is added to the collection in the context on `Drop`. - fn report_diagnostic_if_enabled<'chk, T: Violation>( - &'chk self, - kind: T, - range: TextRange, - ) -> Option>; -} - -impl<'a> DiagnosticReporter<'a> for Checker<'a> { - fn report_diagnostic_if_enabled<'chk, T: Violation>( - &'chk self, - kind: T, - range: TextRange, - ) -> Option> { - self.report_diagnostic_if_enabled(kind, range) - } -} - -impl<'a> DiagnosticReporter<'a> for LintContext<'a> { - fn report_diagnostic_if_enabled<'chk, T: Violation>( - &'chk self, - kind: T, - range: TextRange, - ) -> Option> { - self.report_diagnostic_if_enabled(kind, range) - } -} diff --git a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs index b5c4384c8b3e4..9ee9352c64889 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -8,12 +8,10 @@ use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use crate::Locator; use crate::Violation; -use crate::checkers::ast::DiagnosticReporter; use crate::checkers::ast::{Checker, LintContext}; use crate::preview::is_unicode_to_unicode_confusables_enabled; use crate::rules::ruff::rules::Context; use crate::rules::ruff::rules::confusables::confusable; -use crate::settings::LinterSettings; /// ## What it does /// Checks for ambiguous Unicode characters in strings. @@ -181,7 +179,7 @@ pub(crate) fn ambiguous_unicode_character_comment( range: TextRange, ) { let text = locator.slice(range); - ambiguous_unicode_character(text, range, context.settings(), Context::Comment, context); + ambiguous_unicode_character(text, range, Context::Comment, context); } /// RUF001, RUF002 @@ -205,9 +203,8 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like: ambiguous_unicode_character( text, string_literal.range(), - checker.settings(), context, - checker, + checker.context(), ); } ast::StringLikePart::Bytes(_) => {} @@ -215,25 +212,18 @@ pub(crate) fn ambiguous_unicode_character_string(checker: &Checker, string_like: | ast::StringLikePart::TString(TString { elements, .. }) => { for literal in elements.literals() { let text = checker.locator().slice(literal); - ambiguous_unicode_character( - text, - literal.range(), - checker.settings(), - context, - checker, - ); + ambiguous_unicode_character(text, literal.range(), context, checker.context()); } } } } } -fn ambiguous_unicode_character<'a>( +fn ambiguous_unicode_character( text: &str, range: TextRange, - settings: &LinterSettings, context: Context, - reporter: &impl DiagnosticReporter<'a>, + lint_context: &LintContext, ) { // Most of the time, we don't need to check for ambiguous unicode characters at all. if text.is_ascii() { @@ -249,7 +239,7 @@ fn ambiguous_unicode_character<'a>( if !word_candidates.is_empty() { if word_flags.is_candidate_word() { for candidate in word_candidates.drain(..) { - candidate.into_diagnostic(context, reporter, settings); + candidate.into_diagnostic(context, lint_context); } } word_candidates.clear(); @@ -260,21 +250,23 @@ fn ambiguous_unicode_character<'a>( // case, it's always included as a diagnostic. if !current_char.is_ascii() { if let Some(representant) = confusable(current_char as u32).filter(|representant| { - is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii() + is_unicode_to_unicode_confusables_enabled(lint_context.settings()) + || representant.is_ascii() }) { let candidate = Candidate::new( TextSize::try_from(relative_offset).unwrap() + range.start(), current_char, representant, ); - candidate.into_diagnostic(context, reporter, settings); + candidate.into_diagnostic(context, lint_context); } } } else if current_char.is_ascii() { // The current word contains at least one ASCII character. word_flags |= WordFlags::ASCII; } else if let Some(representant) = confusable(current_char as u32).filter(|representant| { - is_unicode_to_unicode_confusables_enabled(settings) || representant.is_ascii() + is_unicode_to_unicode_confusables_enabled(lint_context.settings()) + || representant.is_ascii() }) { // The current word contains an ambiguous unicode character. word_candidates.push(Candidate::new( @@ -292,7 +284,7 @@ fn ambiguous_unicode_character<'a>( if !word_candidates.is_empty() { if word_flags.is_candidate_word() { for candidate in word_candidates.drain(..) { - candidate.into_diagnostic(context, reporter, settings); + candidate.into_diagnostic(context, lint_context); } } word_candidates.clear(); @@ -342,30 +334,29 @@ impl Candidate { } } - fn into_diagnostic<'a>( - self, - context: Context, - reporter: &impl DiagnosticReporter<'a>, - settings: &LinterSettings, - ) { - if !settings.allowed_confusables.contains(&self.confusable) { + fn into_diagnostic(self, context: Context, lint_context: &LintContext) { + if !lint_context + .settings() + .allowed_confusables + .contains(&self.confusable) + { let char_range = TextRange::at(self.offset, self.confusable.text_len()); match context { - Context::String => reporter.report_diagnostic_if_enabled( + Context::String => lint_context.report_diagnostic_if_enabled( AmbiguousUnicodeCharacterString { confusable: self.confusable, representant: self.representant, }, char_range, ), - Context::Docstring => reporter.report_diagnostic_if_enabled( + Context::Docstring => lint_context.report_diagnostic_if_enabled( AmbiguousUnicodeCharacterDocstring { confusable: self.confusable, representant: self.representant, }, char_range, ), - Context::Comment => reporter.report_diagnostic_if_enabled( + Context::Comment => lint_context.report_diagnostic_if_enabled( AmbiguousUnicodeCharacterComment { confusable: self.confusable, representant: self.representant,