Skip to content

Commit 8f57684

Browse files
committed
Reserve guarded string literals (RFC 3593)
1 parent f92d49b commit 8f57684

24 files changed

+876
-10
lines changed

compiler/rustc_lexer/src/cursor.rs

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::str::Chars;
44
///
55
/// Next characters can be peeked via `first` method,
66
/// and position can be shifted forward via `bump` method.
7+
#[derive(Clone)]
78
pub struct Cursor<'a> {
89
len_remaining: usize,
910
/// Iterator over chars. Slightly faster than a &str.

compiler/rustc_lexer/src/lib.rs

+84-8
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ pub mod unescape;
2929
#[cfg(test)]
3030
mod tests;
3131

32+
use std::num::NonZeroU8;
33+
3234
pub use crate::cursor::Cursor;
3335

3436
use self::LiteralKind::*;
@@ -179,24 +181,27 @@ pub enum DocStyle {
179181
/// `rustc_ast::ast::LitKind`).
180182
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
181183
pub enum LiteralKind {
182-
/// "12_u8", "0o100", "0b120i99", "1f32".
184+
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
183185
Int { base: Base, empty_int: bool },
184-
/// "12.34f32", "1e3", but not "1f32".
186+
/// `12.34f32`, `1e3`, but not `1f32`.
185187
Float { base: Base, empty_exponent: bool },
186-
/// "'a'", "'\\'", "'''", "';"
188+
/// `'a'`, `'\\'`, `'''`, `';`
187189
Char { terminated: bool },
188-
/// "b'a'", "b'\\'", "b'''", "b';"
190+
/// `b'a'`, `b'\\'`, `b'''`, `b';`
189191
Byte { terminated: bool },
190-
/// ""abc"", ""abc"
192+
/// `"abc"`, `"abc`
191193
Str { terminated: bool },
192-
/// "b"abc"", "b"abc"
194+
/// `b"abc"`, `b"abc`
193195
ByteStr { terminated: bool },
194196
/// `c"abc"`, `c"abc`
195197
CStr { terminated: bool },
196-
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
198+
/// `#"abc"#`, `#"a`, `##"a"#`. `None` indicates no closing quote.
199+
/// Allows fewer hashes to close the string to support older editions.
200+
GuardedStr { n_start_hashes: Option<NonZeroU8>, n_end_hashes: u8 },
201+
/// `r"abc"`, `r#"abc"#`, `r####"ab"###"c"####`, `r#"a`. `None` indicates
197202
/// an invalid literal.
198203
RawStr { n_hashes: Option<u8> },
199-
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
204+
/// `br"abc"`, `br#"abc"#`, `br####"ab"###"c"####`, `br#"a`. `None`
200205
/// indicates an invalid literal.
201206
RawByteStr { n_hashes: Option<u8> },
202207
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
@@ -365,6 +370,49 @@ impl Cursor<'_> {
365370
_ => self.ident_or_unknown_prefix(),
366371
},
367372

373+
// Guarded string literal (reserved syntax).
374+
'#' if matches!(self.first(), '"' | '#') => {
375+
// Create a backup to restore later if this
376+
// turns out to not be a guarded literal.
377+
let backup = self.clone();
378+
379+
let mut n_start_hashes: u32 = 1; // Already captured one `#`.
380+
while self.first() == '#' {
381+
n_start_hashes += 1;
382+
self.bump();
383+
}
384+
385+
if self.first() == '"' {
386+
self.bump();
387+
388+
let res = self.guarded_double_quoted_string(n_start_hashes);
389+
let suffix_start = self.pos_within_token();
390+
391+
if let (Ok(n_end_hashes), Ok(n)) = (res, u8::try_from(n_start_hashes)) {
392+
self.eat_literal_suffix();
393+
394+
Literal {
395+
kind: GuardedStr {
396+
n_start_hashes: NonZeroU8::new(n),
397+
// Always succeeds because `n_end_hashes <= n`
398+
n_end_hashes: n_end_hashes.try_into().unwrap(),
399+
},
400+
suffix_start,
401+
}
402+
} else {
403+
Literal {
404+
kind: GuardedStr { n_start_hashes: None, n_end_hashes: 0 },
405+
suffix_start,
406+
}
407+
}
408+
} else {
409+
// Not a guarded string, so restore old state.
410+
*self = backup;
411+
// Return a pound token.
412+
Pound
413+
}
414+
}
415+
368416
// Byte literal, byte string literal, raw byte string literal or identifier.
369417
'b' => self.c_or_byte_string(
370418
|terminated| ByteStr { terminated },
@@ -758,6 +806,34 @@ impl Cursor<'_> {
758806
false
759807
}
760808

809+
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
810+
fn guarded_double_quoted_string(&mut self, n_start_hashes: u32) -> Result<u32, RawStrError> {
811+
debug_assert!(self.prev() == '"');
812+
813+
// Lex the string itself as a normal string literal
814+
// so we can recover that for older editions later.
815+
if !self.double_quoted_string() {
816+
return Err(RawStrError::NoTerminator {
817+
expected: n_start_hashes,
818+
found: 0,
819+
possible_terminator_offset: None,
820+
});
821+
}
822+
823+
// Consume closing '#' symbols.
824+
// Note that this will not consume extra trailing `#` characters:
825+
// `###"abcde"####` is lexed as a `GuardedStr { n_hashes: 3 }`
826+
// followed by a `#` token.
827+
let mut n_end_hashes = 0;
828+
while self.first() == '#' && n_end_hashes < n_start_hashes {
829+
n_end_hashes += 1;
830+
self.bump();
831+
}
832+
833+
// Handle `n_end_hashes < n_start_hashes` later.
834+
Ok(n_end_hashes)
835+
}
836+
761837
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
762838
fn raw_double_quoted_string(&mut self, prefix_len: u32) -> Result<u8, RawStrError> {
763839
// Wrap the actual function to handle the error with too many hashes.

compiler/rustc_lint/src/context/diagnostics.rs

+14
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,20 @@ pub(super) fn builtin(sess: &Session, diagnostic: BuiltinLintDiag, diag: &mut Di
157157
Applicability::MachineApplicable,
158158
);
159159
}
160+
BuiltinLintDiag::ReservedGuardedString(space_span) => {
161+
if let Some(space_span) = space_span {
162+
diag.span_suggestion_verbose(
163+
space_span,
164+
"insert whitespace here to avoid this being parsed as guarded string in Rust 2024",
165+
" ",
166+
Applicability::MachineApplicable,
167+
);
168+
} else {
169+
diag.help(
170+
"insert whitespace between the `#`s and the opening quote to avoid this being parsed as guarded string in Rust 2024",
171+
);
172+
}
173+
}
160174
BuiltinLintDiag::UnusedBuiltinAttribute { attr_name, macro_name, invoc_span } => {
161175
diag.span_note(
162176
invoc_span,

compiler/rustc_lint_defs/src/builtin.rs

+41
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ declare_lint_pass! {
8989
RUST_2021_INCOMPATIBLE_OR_PATTERNS,
9090
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX,
9191
RUST_2021_PRELUDE_COLLISIONS,
92+
RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
9293
SEMICOLON_IN_EXPRESSIONS_FROM_MACROS,
9394
SINGLE_USE_LIFETIMES,
9495
SOFT_UNSTABLE,
@@ -4807,3 +4808,43 @@ declare_lint! {
48074808
reference: "issue #124559 <https://github.com/rust-lang/rust/issues/124559>",
48084809
};
48094810
}
4811+
4812+
declare_lint! {
4813+
/// The `rust_2024_guarded_string_incompatible_syntax` lint detects `#` tokens
4814+
/// that will be parsed as part of a guarded string literal in Rust 2024.
4815+
///
4816+
/// ### Example
4817+
///
4818+
/// ```rust,edition2021,compile_fail
4819+
/// #![deny(rust_2024_guarded_string_incompatible_syntax)]
4820+
///
4821+
/// macro_rules! m {
4822+
/// (# $x:expr #) => ();
4823+
/// (# $x:expr) => ();
4824+
/// }
4825+
///
4826+
/// m!(#"hey"#);
4827+
/// m!(#"hello");
4828+
/// ```
4829+
///
4830+
/// {{produces}}
4831+
///
4832+
/// ### Explanation
4833+
///
4834+
/// Prior to Rust 2024, `#"hey"#` is three tokens: the first `#`
4835+
/// followed by the string literal `"hey"` then the final `#`.
4836+
/// In Rust 2024, the whole sequence is considered a single token.
4837+
///
4838+
/// This lint suggests to add whitespace between the leading `#`
4839+
/// and the string to keep them separated in Rust 2024.
4840+
// Allow this lint -- rustdoc doesn't yet support threading edition into this lint's parser.
4841+
#[allow(rustdoc::invalid_rust_codeblocks)]
4842+
pub RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
4843+
Allow,
4844+
"will be parsed as a guarded string in Rust 2024",
4845+
@future_incompatible = FutureIncompatibleInfo {
4846+
reason: FutureIncompatibilityReason::EditionError(Edition::Edition2024),
4847+
reference: "issue #123735 <https://github.com/rust-lang/rust/issues/123735>",
4848+
};
4849+
crate_level_only
4850+
}

compiler/rustc_lint_defs/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ pub enum BuiltinLintDiag {
590590
ProcMacroBackCompat(String),
591591
OrPatternsBackCompat(Span, String),
592592
ReservedPrefix(Span),
593+
ReservedGuardedString(Option<Span>),
593594
TrailingMacro(bool, Ident),
594595
BreakWithLabelAndLoop(Span),
595596
NamedAsmLabel(String),

compiler/rustc_parse/messages.ftl

+4
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,10 @@ parse_require_colon_after_labeled_expression = labeled expression must be follow
672672
.label = the label
673673
.suggestion = add `:` after the label
674674
675+
parse_reserved_guarded_string = invalid string literal
676+
.note = unprefixed guarded string literals are reserved for future use since Rust 2024
677+
.suggestion_whitespace = consider inserting whitespace here
678+
675679
parse_return_types_use_thin_arrow = return types are denoted using `->`
676680
.suggestion = use `->` instead
677681

compiler/rustc_parse/src/errors.rs

+18
Original file line numberDiff line numberDiff line change
@@ -2009,6 +2009,24 @@ pub enum UnknownPrefixSugg {
20092009
},
20102010
}
20112011

2012+
#[derive(Diagnostic)]
2013+
#[diag(parse_reserved_guarded_string)]
2014+
#[note]
2015+
pub struct ReservedGuardedString {
2016+
#[primary_span]
2017+
pub span: Span,
2018+
#[subdiagnostic]
2019+
pub sugg: Option<GuardedStringSugg>,
2020+
}
2021+
#[derive(Subdiagnostic)]
2022+
#[suggestion(
2023+
parse_suggestion_whitespace,
2024+
code = " ",
2025+
applicability = "maybe-incorrect",
2026+
style = "verbose"
2027+
)]
2028+
pub struct GuardedStringSugg(#[primary_span] pub Span);
2029+
20122030
#[derive(Diagnostic)]
20132031
#[diag(parse_too_many_hashes)]
20142032
pub struct TooManyHashes {

compiler/rustc_parse/src/lexer/mod.rs

+63-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ use rustc_lexer::unescape::{self, EscapeError, Mode};
1212
use rustc_lexer::{Base, DocStyle, RawStrError};
1313
use rustc_lexer::{Cursor, LiteralKind};
1414
use rustc_session::lint::builtin::{
15-
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
15+
RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
16+
TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
1617
};
1718
use rustc_session::lint::BuiltinLintDiag;
1819
use rustc_session::parse::ParseSess;
@@ -241,6 +242,40 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
241242
let prefix_span = self.mk_sp(start, lit_start);
242243
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
243244
}
245+
rustc_lexer::TokenKind::Literal {
246+
kind: rustc_lexer::LiteralKind::GuardedStr { n_start_hashes, .. },
247+
suffix_start: _
248+
} if !self.mk_sp(start, self.pos).edition().at_least_rust_2024() => {
249+
// Check if previous char was `#`, so we don't
250+
// lint for each `#` before the string.
251+
if !(
252+
start > self.start_pos &&
253+
self.src.as_bytes()[self.src_index(start) - 1] == b'#'
254+
) {
255+
let span = self.mk_sp(start, self.pos);
256+
let space_span = n_start_hashes.map(|n_hashes| {
257+
let space_pos = start + BytePos(n_hashes.get().into());
258+
self.mk_sp(space_pos, space_pos)
259+
});
260+
261+
// Before Rust 2021, only emit a lint for migration.
262+
self.psess.buffer_lint_with_diagnostic(
263+
RUST_2024_GUARDED_STRING_INCOMPATIBLE_SYNTAX,
264+
span,
265+
ast::CRATE_NODE_ID,
266+
"will be parsed as a guarded string in Rust 2024",
267+
BuiltinLintDiag::ReservedGuardedString(space_span),
268+
);
269+
}
270+
271+
// reset the state so that only the first `#` was consumed.
272+
let next = start + BytePos(1);
273+
self.pos = next;
274+
self.cursor = Cursor::new(&str_before[1..]);
275+
276+
let pound_span = self.mk_sp(start, next);
277+
return (Token::new(TokenKind::Pound, pound_span), preceded_by_whitespace);
278+
}
244279
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
245280
let suffix_start = start + BytePos(suffix_start);
246281
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
@@ -490,6 +525,33 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
490525
self.report_raw_str_error(start, 1);
491526
}
492527
}
528+
// RFC 3598 reserved this syntax for future use. As of Rust 2024,
529+
// using this syntax produces an error. In earlier editions, however, it
530+
// only results in an (allowed by default) lint, and is treated as
531+
// separate tokens.
532+
rustc_lexer::LiteralKind::GuardedStr { n_start_hashes, n_end_hashes } => {
533+
let span = self.mk_sp(start, self.pos);
534+
535+
if let Some(n_start_hashes) = n_start_hashes {
536+
let n = u32::from(n_start_hashes.get());
537+
let e = u32::from(n_end_hashes);
538+
let expn_data = span.ctxt().outer_expn_data();
539+
540+
let space_pos = start + BytePos(n);
541+
let space_span = self.mk_sp(space_pos, space_pos);
542+
543+
let sugg = if expn_data.is_root() {
544+
Some(errors::GuardedStringSugg(space_span))
545+
} else {
546+
None
547+
};
548+
549+
self.dcx().emit_err(errors::ReservedGuardedString { span, sugg });
550+
self.cook_unicode(token::Str, Mode::Str, start, end, 1 + n, 1 + e) // ##" "##
551+
} else {
552+
self.dcx().emit_fatal(errors::ReservedGuardedString { span, sugg: None });
553+
}
554+
}
493555
rustc_lexer::LiteralKind::RawByteStr { n_hashes } => {
494556
if let Some(n_hashes) = n_hashes {
495557
let n = u32::from(n_hashes);

src/librustdoc/html/highlight.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,8 @@ impl<'src> Classifier<'src> {
850850
| LiteralKind::RawStr { .. }
851851
| LiteralKind::RawByteStr { .. }
852852
| LiteralKind::CStr { .. }
853-
| LiteralKind::RawCStr { .. } => Class::String,
853+
| LiteralKind::RawCStr { .. }
854+
| LiteralKind::GuardedStr { .. } => Class::String,
854855
// Number literals.
855856
LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number,
856857
},

src/tools/rust-analyzer/crates/parser/src/lexed_str.rs

+4
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,10 @@ impl<'a> Converter<'a> {
331331
}
332332
C_STRING
333333
}
334+
rustc_lexer::LiteralKind::GuardedStr { .. } => {
335+
err = "Invalid string literal";
336+
STRING
337+
}
334338
};
335339

336340
let err = if err.is_empty() { None } else { Some(err) };

src/tools/rust-analyzer/crates/proc-macro-srv/src/server/rust_analyzer_span.rs

+1
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ impl server::FreeFunctions for RaSpanServer {
120120
3 + n_hashes.unwrap_or_default() as usize,
121121
1 + n_hashes.unwrap_or_default() as usize,
122122
),
123+
LiteralKind::GuardedStr { .. } => return Err(()),
123124
};
124125

125126
let (lit, suffix) = s.split_at(suffix_start as usize);

src/tools/rust-analyzer/crates/proc-macro-srv/src/server/token_id.rs

+1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ impl server::FreeFunctions for TokenIdServer {
113113
3 + n_hashes.unwrap_or_default() as usize,
114114
1 + n_hashes.unwrap_or_default() as usize,
115115
),
116+
LiteralKind::GuardedStr { .. } => return Err(()),
116117
};
117118

118119
let (lit, suffix) = s.split_at(suffix_start as usize);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//@ force-host
2+
//@ edition:2021
3+
//@ no-prefer-dynamic
4+
5+
#![crate_type = "proc-macro"]
6+
7+
extern crate proc_macro;
8+
9+
use proc_macro::TokenStream;
10+
use std::str::FromStr;
11+
12+
#[proc_macro]
13+
pub fn number_of_tokens_in_a_guarded_string_literal(_: TokenStream) -> TokenStream {
14+
TokenStream::from_str("#\"abc\"#").unwrap().into_iter().count().to_string().parse().unwrap()
15+
}
16+
17+
#[proc_macro]
18+
pub fn number_of_tokens_in_a_guarded_unterminated_string_literal(_: TokenStream) -> TokenStream {
19+
TokenStream::from_str("#\"abc\"").unwrap().into_iter().count().to_string().parse().unwrap()
20+
}

0 commit comments

Comments
 (0)