From 0c00d9fb469cec6500ee6e9331b9b779886773fe Mon Sep 17 00:00:00 2001 From: Marcel Hellwig Date: Tue, 16 Jul 2019 15:49:55 +0200 Subject: [PATCH] improved error reporting on lexer with raw literals The lexer now emits a suggestion for `cargo fix` --- src/libsyntax/parse/lexer/mod.rs | 105 ++++++++++++------ .../ui/parser/raw/raw-byte-string-eof.stderr | 8 +- .../raw/raw-byte-string-literals.stderr | 7 +- src/test/ui/parser/raw/raw-str-delim.stderr | 7 +- .../ui/parser/raw/raw-str-in-macro-call.rs | 19 ++++ .../parser/raw/raw-str-in-macro-call.stderr | 20 ++++ src/test/ui/parser/raw/raw-str-long.rs | 21 ++++ src/test/ui/parser/raw/raw-str-long.stderr | 12 ++ .../ui/parser/raw/raw-str-unterminated.stderr | 4 +- src/test/ui/parser/raw/raw-str.stderr | 8 +- 10 files changed, 162 insertions(+), 49 deletions(-) create mode 100644 src/test/ui/parser/raw/raw-str-in-macro-call.rs create mode 100644 src/test/ui/parser/raw/raw-str-in-macro-call.stderr create mode 100644 src/test/ui/parser/raw/raw-str-long.rs create mode 100644 src/test/ui/parser/raw/raw-str-long.stderr diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index d0c4e8d6a5634..50a0be98431b7 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -4,7 +4,7 @@ use crate::symbol::{sym, Symbol}; use crate::parse::unescape; use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char}; -use errors::{FatalError, Diagnostic, DiagnosticBuilder}; +use errors::{Applicability, FatalError, Diagnostic, DiagnosticBuilder}; use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION}; use core::unicode::property::Pattern_White_Space; @@ -145,19 +145,64 @@ impl<'a> StringReader<'a> { self.ch.is_none() } - fn fail_unterminated_raw_string(&self, pos: BytePos, hash_count: u16) -> ! { - let mut err = self.struct_span_fatal(pos, pos, "unterminated raw string"); - err.span_label(self.mk_sp(pos, pos), "unterminated raw string"); + fn fail_unterminated_raw_string(&self, start: Span, hash_count: u16, spans: Vec) -> ! { + const SPAN_THRESHOLD: usize = 3; + const MSG_STR: &str = "you might have meant to end the raw string here"; + let hash_str = format!("\"{}", "#".repeat(hash_count as usize)); + let spans_len = spans.len(); - if hash_count > 0 { - err.note(&format!("this raw string should be terminated with `\"{}`", - "#".repeat(hash_count as usize))); + let mut err = self.sess.span_diagnostic.struct_span_fatal(start, "unterminated raw string"); + err.span_label(start, "unterminated raw string"); + + for s in spans { + if spans_len < SPAN_THRESHOLD { + err.span_suggestion( + s, + MSG_STR, + hash_str.clone(), + Applicability::MaybeIncorrect + ); + } else { + err.tool_only_span_suggestion( + s, + MSG_STR, + hash_str.clone(), + Applicability::MaybeIncorrect + ); + } + } + + if hash_count > 0 && spans_len >= SPAN_THRESHOLD { + err.note(&format!("this raw string should be terminated with `\"{}`", hash_str)); } err.emit(); FatalError.raise(); } + fn fail_incorrect_raw_string_delimiter(&mut self, start: BytePos) -> ! { + loop { + match self.ch { + Some('#') | Some('"') => break, + _ => self.bump(), + } + } + let end = self.pos; + let span = self.mk_sp(start, end); + let mut err = self.sess.span_diagnostic.struct_span_fatal( + span, + "found invalid character; only `#` is allowed in raw string delimitation", + ); + err.span_suggestion_hidden( + span, + "replace with `#`", + format!("{}", "#".repeat((end.0 - start.0) as usize)), + Applicability::MachineApplicable, + ); + err.emit(); + FatalError.raise(); + } + crate fn emit_fatal_errors(&mut self) { for err in &mut self.fatal_errs { err.emit(); @@ -202,16 +247,6 @@ impl<'a> StringReader<'a> { self.err_span(self.mk_sp(from_pos, to_pos), m) } - /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an - /// escaped character to the error message - fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError { - let mut m = m.to_string(); - m.push_str(": "); - push_escaped_char(&mut m, c); - - self.fatal_span_(from_pos, to_pos, &m[..]) - } - fn struct_span_fatal(&self, from_pos: BytePos, to_pos: BytePos, m: &str) -> DiagnosticBuilder<'a> { @@ -945,6 +980,7 @@ impl<'a> StringReader<'a> { Ok(TokenKind::lit(token::Char, symbol, suffix)) } 'b' => { + let start_bpos = self.pos; self.bump(); let (kind, symbol) = match self.ch { Some('\'') => { @@ -963,7 +999,7 @@ impl<'a> StringReader<'a> { (token::ByteStr, symbol) }, Some('r') => { - let (start, end, hash_count) = self.scan_raw_string(); + let (start, end, hash_count) = self.scan_raw_string(start_bpos); let symbol = self.symbol_from_to(start, end); self.validate_raw_byte_str_escape(start, end); @@ -984,7 +1020,7 @@ impl<'a> StringReader<'a> { Ok(TokenKind::lit(token::Str, symbol, suffix)) } 'r' => { - let (start, end, hash_count) = self.scan_raw_string(); + let (start, end, hash_count) = self.scan_raw_string(self.pos); let symbol = self.symbol_from_to(start, end); self.validate_raw_str_escape(start, end); let suffix = self.scan_optional_raw_name(); @@ -1145,8 +1181,7 @@ impl<'a> StringReader<'a> { /// Scans a raw (byte) string, returning byte position range for `""` /// (including quotes) along with `#` character count in `(b)r##...""##...`; - fn scan_raw_string(&mut self) -> (BytePos, BytePos, u16) { - let start_bpos = self.pos; + fn scan_raw_string(&mut self, start_bpos: BytePos) -> (BytePos, BytePos, u16) { self.bump(); let mut hash_count: u16 = 0; while self.ch_is('#') { @@ -1161,30 +1196,32 @@ impl<'a> StringReader<'a> { hash_count += 1; } - if self.is_eof() { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } else if !self.ch_is('"') { - let last_bpos = self.pos; - let curr_char = self.ch.unwrap(); - self.fatal_span_char(start_bpos, - last_bpos, - "found invalid character; only `#` is allowed \ - in raw string delimitation", - curr_char).raise(); + let bpos_span = self.mk_sp(start_bpos, self.pos); + + match self.ch { + None => self.fail_unterminated_raw_string( + bpos_span, + hash_count, + vec![self.mk_sp(self.pos, self.pos)] + ), + Some('"') => (), + Some(_) => self.fail_incorrect_raw_string_delimiter(self.pos), } + self.bump(); let content_start_bpos = self.pos; let mut content_end_bpos; + let mut spans = vec![]; + 'outer: loop { match self.ch { - None => { - self.fail_unterminated_raw_string(start_bpos, hash_count); - } + None => self.fail_unterminated_raw_string(bpos_span, hash_count, spans), Some('"') => { content_end_bpos = self.pos; for _ in 0..hash_count { self.bump(); if !self.ch_is('#') { + spans.push(self.mk_sp(content_end_bpos, self.pos)); continue 'outer; } } diff --git a/src/test/ui/parser/raw/raw-byte-string-eof.stderr b/src/test/ui/parser/raw/raw-byte-string-eof.stderr index 2ba50e8fb2a34..2d04bd9244938 100644 --- a/src/test/ui/parser/raw/raw-byte-string-eof.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-eof.stderr @@ -1,10 +1,12 @@ error: unterminated raw string - --> $DIR/raw-byte-string-eof.rs:2:6 + --> $DIR/raw-byte-string-eof.rs:2:5 | LL | br##"a"#; - | ^ unterminated raw string + | ^^^^ unterminated raw string +help: you might have meant to end the raw string here | - = note: this raw string should be terminated with `"##` +LL | br##"a'##; + | ^^^ error: aborting due to previous error diff --git a/src/test/ui/parser/raw/raw-byte-string-literals.stderr b/src/test/ui/parser/raw/raw-byte-string-literals.stderr index 4880d1fdbe8a7..8a118e191e7b5 100644 --- a/src/test/ui/parser/raw/raw-byte-string-literals.stderr +++ b/src/test/ui/parser/raw/raw-byte-string-literals.stderr @@ -10,11 +10,12 @@ error: raw byte string must be ASCII LL | br"é"; | ^ -error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-byte-string-literals.rs:6:6 +error: found invalid character; only `#` is allowed in raw string delimitation + --> $DIR/raw-byte-string-literals.rs:6:9 | LL | br##~"a"~##; - | ^^^ + | ^ + = help: replace with `#` error: aborting due to 3 previous errors diff --git a/src/test/ui/parser/raw/raw-str-delim.stderr b/src/test/ui/parser/raw/raw-str-delim.stderr index b86b9e90e73ad..4a9e4f7720d0f 100644 --- a/src/test/ui/parser/raw/raw-str-delim.stderr +++ b/src/test/ui/parser/raw/raw-str-delim.stderr @@ -1,8 +1,9 @@ -error: found invalid character; only `#` is allowed in raw string delimitation: ~ - --> $DIR/raw-str-delim.rs:2:5 +error: found invalid character; only `#` is allowed in raw string delimitation + --> $DIR/raw-str-delim.rs:2:7 | LL | r#~"#"~# - | ^^ + | ^ + = help: replace with `#` error: aborting due to previous error diff --git a/src/test/ui/parser/raw/raw-str-in-macro-call.rs b/src/test/ui/parser/raw/raw-str-in-macro-call.rs new file mode 100644 index 0000000000000..2647c9cfe6b7c --- /dev/null +++ b/src/test/ui/parser/raw/raw-str-in-macro-call.rs @@ -0,0 +1,19 @@ +// check-pass + +macro_rules! m1 { + ($tt:tt #) => () +} + +macro_rules! m2 { + ($tt:tt) => () +} + +macro_rules! m3 { + ($tt:tt #) => () +} + +fn main() { + m1!(r#"abc"##); + m2!(r#"abc"##); + m3!(r#"abc"#); +} diff --git a/src/test/ui/parser/raw/raw-str-in-macro-call.stderr b/src/test/ui/parser/raw/raw-str-in-macro-call.stderr new file mode 100644 index 0000000000000..010d4093130d1 --- /dev/null +++ b/src/test/ui/parser/raw/raw-str-in-macro-call.stderr @@ -0,0 +1,20 @@ +error: no rules expected the token `#` + --> $DIR/raw-str-in-macro-call.rs:17:17 + | +LL | macro_rules! m2 { + | --------------- when calling this macro +... +LL | m2!(r#"abc"##); + | ^ no rules expected this token in macro call + +error: unexpected end of macro invocation + --> $DIR/raw-str-in-macro-call.rs:18:5 + | +LL | macro_rules! m3 { + | --------------- when calling this macro +... +LL | m3!(r#"abc"#); + | ^^^^^^^^^^^^^^ missing tokens in macro arguments + +error: aborting due to 2 previous errors + diff --git a/src/test/ui/parser/raw/raw-str-long.rs b/src/test/ui/parser/raw/raw-str-long.rs new file mode 100644 index 0000000000000..fa3f3c68fde0d --- /dev/null +++ b/src/test/ui/parser/raw/raw-str-long.rs @@ -0,0 +1,21 @@ +fn main() { + let a = r##"This //~ ERROR unterminated raw string + is + a + very + long + string + which + goes + over + a + b + c + d + e + f + g + h + lines + "#; +} diff --git a/src/test/ui/parser/raw/raw-str-long.stderr b/src/test/ui/parser/raw/raw-str-long.stderr new file mode 100644 index 0000000000000..121fc861594e3 --- /dev/null +++ b/src/test/ui/parser/raw/raw-str-long.stderr @@ -0,0 +1,12 @@ +error: unterminated raw string + --> $DIR/raw-str-long.rs:2:13 + | +LL | let a = r##"This + | ^^^ unterminated raw string +help: you might have meant to end the raw string here + | +LL | "##; + | ^^^ + +error: aborting due to previous error + diff --git a/src/test/ui/parser/raw/raw-str-unterminated.stderr b/src/test/ui/parser/raw/raw-str-unterminated.stderr index 67792eb91e5ca..33eb1b76fef4d 100644 --- a/src/test/ui/parser/raw/raw-str-unterminated.stderr +++ b/src/test/ui/parser/raw/raw-str-unterminated.stderr @@ -2,9 +2,7 @@ error: unterminated raw string --> $DIR/raw-str-unterminated.rs:2:5 | LL | r#" string literal goes on - | ^ unterminated raw string - | - = note: this raw string should be terminated with `"#` + | ^^ unterminated raw string error: aborting due to previous error diff --git a/src/test/ui/parser/raw/raw-str.stderr b/src/test/ui/parser/raw/raw-str.stderr index 5572511881d57..edb1f269f9ffe 100644 --- a/src/test/ui/parser/raw/raw-str.stderr +++ b/src/test/ui/parser/raw/raw-str.stderr @@ -1,10 +1,12 @@ error: unterminated raw string - --> $DIR/raw_string.rs:2:13 + --> $DIR/raw-str.rs:2:13 | LL | let x = r##"lol"#; - | ^ unterminated raw string + | ^^^ unterminated raw string +help: you might have meant to end the raw string here | - = note: this raw string should be terminated with `"##` +LL | let x = r##"lol'##; + | ^^^ error: aborting due to previous error