Skip to content

Commit

Permalink
Rollup merge of #129899 - veera-sivarajan:fix-97793-pr-final, r=cheny…
Browse files Browse the repository at this point in the history
…ukang

Add Suggestions for Misspelled Keywords

Fixes #97793

This PR detects misspelled keywords using two heuristics:

1. Lowercasing the unexpected identifier.
2. Using edit distance to find a keyword similar to the unexpected identifier.

However, it does not detect each and every misspelled keyword to
minimize false positives and ambiguities. More details about the
implementation can be found in the comments.
  • Loading branch information
compiler-errors authored Sep 7, 2024
2 parents 6dd07e4 + 14e86eb commit d6a4298
Show file tree
Hide file tree
Showing 59 changed files with 702 additions and 12 deletions.
6 changes: 6 additions & 0 deletions compiler/rustc_parse/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ parse_invalid_char_in_escape_msg = invalid character in {$is_hex ->
*[false] unicode
} escape
parse_invalid_comparison_operator = invalid comparison operator `{$invalid}`
.use_instead = `{$invalid}` is not a valid comparison operator, use `{$correct}`
.spaceship_operator_invalid = `<=>` is not a valid comparison operator, use `std::cmp::Ordering`
Expand Down Expand Up @@ -581,6 +582,11 @@ parse_missing_trait_in_trait_impl = missing trait in a trait impl
.suggestion_add_trait = add a trait here
.suggestion_remove_for = for an inherent impl, drop this `for`
parse_misspelled_kw = {$is_incorrect_case ->
[true] write keyword `{$similar_kw}` in lowercase
*[false] there is a keyword `{$similar_kw}` with a similar name
}
parse_modifier_lifetime = `{$modifier}` may only modify trait bounds, not lifetime bounds
.suggestion = remove the `{$modifier}`
Expand Down
85 changes: 82 additions & 3 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ use rustc_errors::{
Subdiagnostic,
};
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::source_map::Spanned;
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::symbol::{kw, sym, AllKeywords, Ident};
use rustc_span::{BytePos, Span, SpanSnippetError, Symbol, DUMMY_SP};
use thin_vec::{thin_vec, ThinVec};
use tracing::{debug, trace};
Expand Down Expand Up @@ -203,6 +204,37 @@ impl std::fmt::Display for UnaryFixity {
}
}

#[derive(Debug, rustc_macros::Subdiagnostic)]
#[suggestion(
parse_misspelled_kw,
applicability = "machine-applicable",
code = "{similar_kw}",
style = "verbose"
)]
struct MisspelledKw {
similar_kw: String,
#[primary_span]
span: Span,
is_incorrect_case: bool,
}

/// Checks if the given `lookup` identifier is similar to any keyword symbol in `candidates`.
fn find_similar_kw(lookup: Ident, candidates: &[Symbol]) -> Option<MisspelledKw> {
let lowercase = lookup.name.as_str().to_lowercase();
let lowercase_sym = Symbol::intern(&lowercase);
if candidates.contains(&lowercase_sym) {
Some(MisspelledKw { similar_kw: lowercase, span: lookup.span, is_incorrect_case: true })
} else if let Some(similar_sym) = find_best_match_for_name(candidates, lookup.name, None) {
Some(MisspelledKw {
similar_kw: similar_sym.to_string(),
span: lookup.span,
is_incorrect_case: false,
})
} else {
None
}
}

struct MultiSugg {
msg: String,
patches: Vec<(Span, String)>,
Expand Down Expand Up @@ -638,9 +670,9 @@ impl<'a> Parser<'a> {
let concat = Symbol::intern(&format!("{prev}{cur}"));
let ident = Ident::new(concat, DUMMY_SP);
if ident.is_used_keyword() || ident.is_reserved() || ident.is_raw_guess() {
let span = self.prev_token.span.to(self.token.span);
let concat_span = self.prev_token.span.to(self.token.span);
err.span_suggestion_verbose(
span,
concat_span,
format!("consider removing the space to spell keyword `{concat}`"),
concat,
Applicability::MachineApplicable,
Expand Down Expand Up @@ -741,9 +773,55 @@ impl<'a> Parser<'a> {
err.span_label(sp, label_exp);
err.span_label(self.token.span, "unexpected token");
}

// Check for misspelled keywords if there are no suggestions added to the diagnostic.
if err.suggestions.as_ref().is_ok_and(|code_suggestions| code_suggestions.is_empty()) {
self.check_for_misspelled_kw(&mut err, &expected);
}
Err(err)
}

/// Checks if the current token or the previous token are misspelled keywords
/// and adds a helpful suggestion.
fn check_for_misspelled_kw(&self, err: &mut Diag<'_>, expected: &[TokenType]) {
let Some((curr_ident, _)) = self.token.ident() else {
return;
};
let expected_tokens: &[TokenType] =
expected.len().checked_sub(10).map_or(&expected, |index| &expected[index..]);
let expected_keywords: Vec<Symbol> = expected_tokens
.iter()
.filter_map(|token| if let TokenType::Keyword(kw) = token { Some(*kw) } else { None })
.collect();

// When there are a few keywords in the last ten elements of `self.expected_tokens` and the current
// token is an identifier, it's probably a misspelled keyword.
// This handles code like `async Move {}`, misspelled `if` in match guard, misspelled `else` in `if`-`else`
// and mispelled `where` in a where clause.
if !expected_keywords.is_empty()
&& !curr_ident.is_used_keyword()
&& let Some(misspelled_kw) = find_similar_kw(curr_ident, &expected_keywords)
{
err.subdiagnostic(misspelled_kw);
} else if let Some((prev_ident, _)) = self.prev_token.ident()
&& !prev_ident.is_used_keyword()
{
// We generate a list of all keywords at runtime rather than at compile time
// so that it gets generated only when the diagnostic needs it.
// Also, it is unlikely that this list is generated multiple times because the
// parser halts after execution hits this path.
let all_keywords = AllKeywords::new().collect_used(|| prev_ident.span.edition());

// Otherwise, check the previous token with all the keywords as possible candidates.
// This handles code like `Struct Human;` and `While a < b {}`.
// We check the previous token only when the current token is an identifier to avoid false
// positives like suggesting keyword `for` for `extern crate foo {}`.
if let Some(misspelled_kw) = find_similar_kw(prev_ident, &all_keywords) {
err.subdiagnostic(misspelled_kw);
}
}
}

/// The user has written `#[attr] expr` which is unsupported. (#106020)
pub(super) fn attr_on_non_tail_expr(&self, expr: &Expr) -> ErrorGuaranteed {
// Missing semicolon typo error.
Expand Down Expand Up @@ -846,6 +924,7 @@ impl<'a> Parser<'a> {
);
}
}

err.emit()
}

Expand Down
42 changes: 41 additions & 1 deletion compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ mod tests;

// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`.
symbols! {
// If you modify this list, adjust `is_special` and `is_used_keyword`/`is_unused_keyword`.
// If you modify this list, adjust `is_special`, `is_used_keyword`/`is_unused_keyword`
// and `AllKeywords`.
// But this should rarely be necessary if the keywords are kept in alphabetic order.
Keywords {
// Special reserved identifiers used internally for elided lifetimes,
Expand Down Expand Up @@ -2579,3 +2580,42 @@ impl Ident {
self.name.can_be_raw() && self.is_reserved()
}
}

/// An iterator over all the keywords in Rust.
#[derive(Copy, Clone)]
pub struct AllKeywords {
curr_idx: u32,
end_idx: u32,
}

impl AllKeywords {
/// Initialize a new iterator over all the keywords.
///
/// *Note:* Please update this if a new keyword is added beyond the current
/// range.
pub fn new() -> Self {
AllKeywords { curr_idx: kw::Empty.as_u32(), end_idx: kw::Yeet.as_u32() }
}

/// Collect all the keywords in a given edition into a vector.
pub fn collect_used(&self, edition: impl Copy + FnOnce() -> Edition) -> Vec<Symbol> {
self.filter(|&keyword| {
keyword.is_used_keyword_always() || keyword.is_used_keyword_conditional(edition)
})
.collect()
}
}

impl Iterator for AllKeywords {
type Item = Symbol;

fn next(&mut self) -> Option<Self::Item> {
if self.curr_idx <= self.end_idx {
let keyword = Symbol::new(self.curr_idx);
self.curr_idx += 1;
Some(keyword)
} else {
None
}
}
}
5 changes: 5 additions & 0 deletions tests/ui/parser/extern-crate-unexpected-token.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ error: expected one of `crate` or `{`, found `crte`
|
LL | extern crte foo;
| ^^^^ expected one of `crate` or `{`
|
help: there is a keyword `crate` with a similar name
|
LL | extern crate foo;
| ~~~~~

error: aborting due to 1 previous error

Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@ error: expected one of `:`, `@`, or `|`, found keyword `Self`
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:4:17
|
LL | fn foo(&mur Self) {}
| -----^^^^
| | |
| | expected one of `:`, `@`, or `|`
| help: declare the type after the parameter binding: `<identifier>: <type>`
| ^^^^ expected one of `:`, `@`, or `|`
|
help: there is a keyword `mut` with a similar name
|
LL | fn foo(&mut Self) {}
| ~~~
help: declare the type after the parameter binding
|
LL | fn foo(<identifier>: <type>) {}
| ~~~~~~~~~~~~~~~~~~~~

error: unexpected lifetime `'static` in pattern
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:8:13
Expand All @@ -35,16 +41,27 @@ error: expected one of `:`, `@`, or `|`, found keyword `Self`
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:8:25
|
LL | fn bar(&'static mur Self) {}
| -------------^^^^
| | |
| | expected one of `:`, `@`, or `|`
| help: declare the type after the parameter binding: `<identifier>: <type>`
| ^^^^ expected one of `:`, `@`, or `|`
|
help: there is a keyword `mut` with a similar name
|
LL | fn bar(&'static mut Self) {}
| ~~~
help: declare the type after the parameter binding
|
LL | fn bar(<identifier>: <type>) {}
| ~~~~~~~~~~~~~~~~~~~~

error: expected one of `:`, `@`, or `|`, found keyword `Self`
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:14:17
|
LL | fn baz(&mur Self @ _) {}
| ^^^^ expected one of `:`, `@`, or `|`
|
help: there is a keyword `mut` with a similar name
|
LL | fn baz(&mut Self @ _) {}
| ~~~

error[E0533]: expected unit struct, found self constructor `Self`
--> $DIR/issue-70549-resolve-after-recovered-self-ctor.rs:4:17
Expand Down
6 changes: 6 additions & 0 deletions tests/ui/parser/misspelled-keywords/assoc-type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
trait Animal {
Type Result = u8;
//~^ ERROR expected one of
}

fn main() {}
18 changes: 18 additions & 0 deletions tests/ui/parser/misspelled-keywords/assoc-type.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
error: expected one of `!` or `::`, found `Result`
--> $DIR/assoc-type.rs:2:10
|
LL | trait Animal {
| - while parsing this item list starting here
LL | Type Result = u8;
| ^^^^^^ expected one of `!` or `::`
LL |
LL | }
| - the item list ends here
|
help: write keyword `type` in lowercase
|
LL | type Result = u8;
| ~~~~

error: aborting due to 1 previous error

6 changes: 6 additions & 0 deletions tests/ui/parser/misspelled-keywords/async-move.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//@ edition: 2018

fn main() {
async Move {}
//~^ ERROR expected one of
}
13 changes: 13 additions & 0 deletions tests/ui/parser/misspelled-keywords/async-move.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
error: expected one of `move`, `|`, or `||`, found `Move`
--> $DIR/async-move.rs:4:11
|
LL | async Move {}
| ^^^^ expected one of `move`, `|`, or `||`
|
help: write keyword `move` in lowercase
|
LL | async move {}
| ~~~~

error: aborting due to 1 previous error

5 changes: 5 additions & 0 deletions tests/ui/parser/misspelled-keywords/const-fn.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cnst fn code() {}
//~^ ERROR expected one of

fn main() {
}
13 changes: 13 additions & 0 deletions tests/ui/parser/misspelled-keywords/const-fn.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
error: expected one of `!` or `::`, found keyword `fn`
--> $DIR/const-fn.rs:1:6
|
LL | cnst fn code() {}
| ^^ expected one of `!` or `::`
|
help: there is a keyword `const` with a similar name
|
LL | const fn code() {}
| ~~~~~

error: aborting due to 1 previous error

4 changes: 4 additions & 0 deletions tests/ui/parser/misspelled-keywords/const-generics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fn foo<consta N: usize>(_arr: [i32; N]) {}
//~^ ERROR expected one of

fn main() {}
13 changes: 13 additions & 0 deletions tests/ui/parser/misspelled-keywords/const-generics.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
error: expected one of `,`, `:`, `=`, or `>`, found `N`
--> $DIR/const-generics.rs:1:15
|
LL | fn foo<consta N: usize>(_arr: [i32; N]) {}
| ^ expected one of `,`, `:`, `=`, or `>`
|
help: there is a keyword `const` with a similar name
|
LL | fn foo<const N: usize>(_arr: [i32; N]) {}
| ~~~~~

error: aborting due to 1 previous error

4 changes: 4 additions & 0 deletions tests/ui/parser/misspelled-keywords/const.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cons A: u8 = 10;
//~^ ERROR expected one of

fn main() {}
13 changes: 13 additions & 0 deletions tests/ui/parser/misspelled-keywords/const.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
error: expected one of `!` or `::`, found `A`
--> $DIR/const.rs:1:6
|
LL | cons A: u8 = 10;
| ^ expected one of `!` or `::`
|
help: there is a keyword `const` with a similar name
|
LL | const A: u8 = 10;
| ~~~~~

error: aborting due to 1 previous error

4 changes: 4 additions & 0 deletions tests/ui/parser/misspelled-keywords/for-loop.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fn main() {
form i in 1..10 {}
//~^ ERROR expected one of
}
13 changes: 13 additions & 0 deletions tests/ui/parser/misspelled-keywords/for-loop.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `i`
--> $DIR/for-loop.rs:2:10
|
LL | form i in 1..10 {}
| ^ expected one of 8 possible tokens
|
help: there is a keyword `for` with a similar name
|
LL | for i in 1..10 {}
| ~~~

error: aborting due to 1 previous error

Loading

0 comments on commit d6a4298

Please sign in to comment.