Skip to content

Commit 8f12cac

Browse files
committed
[xt] XXX: delay unescaping
Note that from_token_lit was looking for errors but never finding them! - issue-62913.rs: The structure and output changed a bit. Issue #62913 was about an ICE due to an unterminated string literal, so the new version should be good enough. - literals-are-validated-before-expansion.rs: this tests exactly the behaviour that has been changed. XXX: insert a new test covering more of that - XXX: explain the tests that needed to be split - XXX: tests/ui/parser/unicode-control-codepoints.rs: just reordered errors - XXX: tests/rustdoc-ui/ignore-block-help.rs: relies on a parsing error occurring. The error present was an unescaping error, which is now delayed to after parsing. So the commit changes it to an "unterminated character literal" error which still occurs during parsing.
1 parent b810472 commit 8f12cac

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+562
-410
lines changed

compiler/rustc_ast/src/attr/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ impl AttrArgsEq {
240240
match self {
241241
AttrArgsEq::Ast(expr) => match expr.kind {
242242
ExprKind::Lit(token_lit) => {
243-
LitKind::from_token_lit(token_lit).ok().and_then(|lit| lit.str())
243+
LitKind::from_token_lit(token_lit).0.ok().and_then(|lit| lit.str())
244244
}
245245
_ => None,
246246
},
@@ -426,6 +426,7 @@ impl MetaItemKind {
426426
ExprKind::Lit(token_lit) => {
427427
// Turn failures to `None`, we'll get parse errors elsewhere.
428428
MetaItemLit::from_token_lit(token_lit, expr.span)
429+
.0
429430
.ok()
430431
.map(|lit| MetaItemKind::NameValue(lit))
431432
}

compiler/rustc_ast/src/util/literal.rs

+192-97
Large diffs are not rendered by default.

compiler/rustc_ast_lowering/src/expr.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,14 @@ impl<'hir> LoweringContext<'_, 'hir> {
118118
hir::ExprKind::Unary(op, ohs)
119119
}
120120
ExprKind::Lit(token_lit) => {
121-
let lit_kind = match LitKind::from_token_lit(*token_lit) {
121+
let (result, errs) = LitKind::from_token_lit(*token_lit);
122+
let lit_kind = match result {
122123
Ok(lit_kind) => lit_kind,
123-
Err(err) => {
124-
report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span);
125-
LitKind::Err
126-
}
124+
Err(()) => LitKind::Err,
127125
};
126+
for err in errs {
127+
report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span);
128+
}
128129
let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind));
129130
hir::ExprKind::Lit(lit)
130131
}

compiler/rustc_ast_lowering/src/format.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,11 @@ fn inline_literals(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> {
127127
&& let ExprKind::Lit(lit) = arg.kind
128128
{
129129
if let token::LitKind::Str | token::LitKind::StrRaw(_) = lit.kind
130-
&& let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit)
130+
&& let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit).0
131131
{
132132
literal = Some(s);
133133
} else if let token::LitKind::Integer = lit.kind
134-
&& let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit)
134+
&& let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit).0
135135
{
136136
literal = Some(Symbol::intern(&n.to_string()));
137137
}

compiler/rustc_ast_lowering/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
945945
// In valid code the value always ends up as a single literal. Otherwise, a dummy
946946
// literal suffices because the error is handled elsewhere.
947947
let lit = if let ExprKind::Lit(token_lit) = expr.kind
948-
&& let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span)
948+
&& let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span).0
949949
{
950950
lit
951951
} else {

compiler/rustc_builtin_macros/src/concat.rs

+35-29
Original file line numberDiff line numberDiff line change
@@ -19,48 +19,54 @@ pub fn expand_concat(
1919
let mut has_errors = false;
2020
for e in es {
2121
match e.kind {
22-
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
23-
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
24-
accumulator.push_str(s.as_str());
25-
}
26-
Ok(ast::LitKind::Char(c)) => {
27-
accumulator.push(c);
28-
}
29-
Ok(ast::LitKind::Int(i, _)) => {
30-
accumulator.push_str(&i.to_string());
31-
}
32-
Ok(ast::LitKind::Bool(b)) => {
33-
accumulator.push_str(&b.to_string());
34-
}
35-
Ok(ast::LitKind::CStr(..)) => {
36-
cx.emit_err(errors::ConcatCStrLit { span: e.span });
37-
has_errors = true;
38-
}
39-
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
40-
cx.emit_err(errors::ConcatBytestr { span: e.span });
41-
has_errors = true;
42-
}
43-
Ok(ast::LitKind::Err) => {
44-
has_errors = true;
22+
ast::ExprKind::Lit(token_lit) => {
23+
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
24+
match res {
25+
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
26+
accumulator.push_str(s.as_str());
27+
}
28+
Ok(ast::LitKind::Char(c)) => {
29+
accumulator.push(c);
30+
}
31+
Ok(ast::LitKind::Int(i, _)) => {
32+
accumulator.push_str(&i.to_string());
33+
}
34+
Ok(ast::LitKind::Bool(b)) => {
35+
accumulator.push_str(&b.to_string());
36+
}
37+
Ok(ast::LitKind::CStr(..)) => {
38+
cx.emit_err(errors::ConcatCStrLit { span: e.span });
39+
has_errors = true;
40+
}
41+
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
42+
cx.emit_err(errors::ConcatBytestr { span: e.span });
43+
has_errors = true;
44+
}
45+
Ok(ast::LitKind::Err) | Err(()) => {
46+
has_errors = true;
47+
}
4548
}
46-
Err(err) => {
49+
// njn: what happens if I remove some of these non-lowering ones?
50+
for err in errs {
4751
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
48-
has_errors = true;
4952
}
50-
},
53+
}
5154
// We also want to allow negative numeric literals.
5255
ast::ExprKind::Unary(ast::UnOp::Neg, ref expr)
5356
if let ast::ExprKind::Lit(token_lit) = expr.kind =>
5457
{
55-
match ast::LitKind::from_token_lit(token_lit) {
58+
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
59+
match res {
5660
Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")),
5761
Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")),
58-
Err(err) => {
59-
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
62+
Err(()) => {
6063
has_errors = true;
6164
}
6265
_ => missing_literal.push(e.span),
6366
}
67+
for err in errs {
68+
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
69+
}
6470
}
6571
ast::ExprKind::IncludedBytes(..) => {
6672
cx.emit_err(errors::ConcatBytestr { span: e.span });

compiler/rustc_builtin_macros/src/concat_bytes.rs

+11-7
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ fn invalid_type_err(
1717
ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob,
1818
};
1919
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
20-
match ast::LitKind::from_token_lit(token_lit) {
20+
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
21+
match res {
2122
Ok(ast::LitKind::CStr(_, _)) => {
2223
// Avoid ambiguity in handling of terminal `NUL` by refusing to
2324
// concatenate C string literals as bytes.
@@ -60,9 +61,11 @@ fn invalid_type_err(
6061
cx.emit_err(ConcatBytesNonU8 { span });
6162
}
6263
Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(),
63-
Err(err) => {
64-
report_lit_error(&cx.sess.parse_sess, err, token_lit, span);
65-
}
64+
Err(()) => {}
65+
}
66+
for err in errs {
67+
// njn: change to report_lit_error*s*
68+
report_lit_error(&cx.sess.parse_sess, err, token_lit, span);
6669
}
6770
}
6871

@@ -80,7 +83,8 @@ fn handle_array_element(
8083
*has_errors = true;
8184
None
8285
}
83-
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
86+
// njn: e.g. why doesn't this one have report_lit_error?
87+
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 {
8488
Ok(ast::LitKind::Int(
8589
val,
8690
ast::LitIntType::Unsuffixed | ast::LitIntType::Unsigned(ast::UintTy::U8),
@@ -141,7 +145,7 @@ pub fn expand_concat_bytes(
141145
ast::ExprKind::Repeat(expr, count) => {
142146
if let ast::ExprKind::Lit(token_lit) = count.value.kind
143147
&& let Ok(ast::LitKind::Int(count_val, _)) =
144-
ast::LitKind::from_token_lit(token_lit)
148+
ast::LitKind::from_token_lit(token_lit).0
145149
{
146150
if let Some(elem) =
147151
handle_array_element(cx, &mut has_errors, &mut missing_literals, expr)
@@ -154,7 +158,7 @@ pub fn expand_concat_bytes(
154158
cx.emit_err(errors::ConcatBytesBadRepeat { span: count.value.span });
155159
}
156160
}
157-
&ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
161+
&ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 {
158162
Ok(ast::LitKind::Byte(val)) => {
159163
accumulator.push(val);
160164
}

compiler/rustc_expand/src/base.rs

+22-18
Original file line numberDiff line numberDiff line change
@@ -1235,26 +1235,30 @@ pub fn expr_to_spanned_string<'a>(
12351235
let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr();
12361236

12371237
Err(match expr.kind {
1238-
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
1239-
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
1240-
Ok(ast::LitKind::ByteStr(..)) => {
1241-
let mut err = cx.struct_span_err(expr.span, err_msg);
1242-
let span = expr.span.shrink_to_lo();
1243-
err.span_suggestion(
1244-
span.with_hi(span.lo() + BytePos(1)),
1245-
"consider removing the leading `b`",
1246-
"",
1247-
Applicability::MaybeIncorrect,
1248-
);
1249-
Some((err, true))
1250-
}
1251-
Ok(ast::LitKind::Err) => None,
1252-
Err(err) => {
1238+
ast::ExprKind::Lit(token_lit) => {
1239+
let (lit_kind, errs) = ast::LitKind::from_token_lit(token_lit);
1240+
let res = match lit_kind {
1241+
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
1242+
Ok(ast::LitKind::ByteStr(..)) => {
1243+
let mut err = cx.struct_span_err(expr.span, err_msg);
1244+
let span = expr.span.shrink_to_lo();
1245+
err.span_suggestion(
1246+
span.with_hi(span.lo() + BytePos(1)),
1247+
"consider removing the leading `b`",
1248+
"",
1249+
Applicability::MaybeIncorrect,
1250+
);
1251+
Some((err, true))
1252+
}
1253+
Ok(ast::LitKind::Err) => None,
1254+
Err(()) => None,
1255+
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
1256+
};
1257+
for err in errs {
12531258
parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span);
1254-
None
12551259
}
1256-
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
1257-
},
1260+
res
1261+
}
12581262
ast::ExprKind::Err => None,
12591263
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
12601264
})

compiler/rustc_expand/src/mbe/metavar_expr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ fn parse_depth<'sess>(
119119
.span_diagnostic
120120
.struct_span_err(span, "meta-variable expression depth must be a literal"));
121121
};
122-
if let Ok(lit_kind) = LitKind::from_token_lit(*lit)
122+
if let Ok(lit_kind) = LitKind::from_token_lit(*lit).0
123123
&& let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind
124124
&& let Ok(n_usize) = usize::try_from(n_u128)
125125
{

compiler/rustc_lexer/src/unescape.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ where
346346
// them in the range computation.
347347
while let Some(c) = chars.next() {
348348
let start = src.len() - chars.as_str().len() - c.len_utf8();
349-
let res = match c {
349+
let res: Result<T, EscapeError> = match c {
350350
'\\' => {
351351
match chars.clone().next() {
352352
Some('\n') => {
@@ -362,7 +362,7 @@ where
362362
_ => scan_escape::<T>(&mut chars, mode),
363363
}
364364
}
365-
'"' => Err(EscapeError::EscapeOnlyChar),
365+
'"' => Err(EscapeError::EscapeOnlyChar), // njn: is this ever hit?
366366
'\r' => Err(EscapeError::BareCarriageReturn),
367367
_ => ascii_check(c, chars_should_be_ascii).map(Into::into),
368368
};

compiler/rustc_parse/src/lexer/mod.rs

+24-24
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ use rustc_span::{edition::Edition, BytePos, Pos, Span};
2121

2222
mod diagnostics;
2323
mod tokentrees;
24-
mod unescape_error_reporting;
24+
pub(crate) mod unescape_error_reporting;
2525
mod unicode_chars;
2626

27-
use unescape_error_reporting::{emit_unescape_error, escaped_char};
27+
use unescape_error_reporting::escaped_char;
2828

2929
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
3030
//
@@ -696,47 +696,47 @@ impl<'a> StringReader<'a> {
696696
fn cook_common(
697697
&self,
698698
kind: token::LitKind,
699-
mode: Mode,
699+
mode: Mode, // njn: remove
700700
start: BytePos,
701701
end: BytePos,
702702
prefix_len: u32,
703703
postfix_len: u32,
704704
unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
705705
) -> (token::LitKind, Symbol) {
706-
let mut has_fatal_err = false;
707706
let content_start = start + BytePos(prefix_len);
708707
let content_end = end - BytePos(postfix_len);
709708
let lit_content = self.str_from_to(content_start, content_end);
709+
#[allow(unused)]
710710
unescape(lit_content, mode, &mut |range, result| {
711711
// Here we only check for errors. The actual unescaping is done later.
712+
// njn: temp for comparison, remove eventually
712713
if let Err(err) = result {
714+
// `span` is substring expressed as a span
715+
// `range` is substring expressed as indices
713716
let span_with_quotes = self.mk_sp(start, end);
714717
let (start, end) = (range.start as u32, range.end as u32);
715718
let lo = content_start + BytePos(start);
716719
let hi = lo + BytePos(end - start);
717720
let span = self.mk_sp(lo, hi);
718-
if err.is_fatal() {
719-
has_fatal_err = true;
720-
}
721-
emit_unescape_error(
722-
&self.sess.span_diagnostic,
723-
lit_content,
724-
span_with_quotes,
725-
span,
726-
mode,
727-
range,
728-
err,
729-
);
721+
//if err.is_fatal() {
722+
// //has_fatal_err = true;
723+
//}
724+
// eprintln!(
725+
// "earl_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}",
726+
// lit_content, span_with_quotes, span, mode, range, err
727+
// );
728+
// crate::lexer::unescape_error_reporting::emit_unescape_error(
729+
// &self.sess.span_diagnostic,
730+
// lit_content,
731+
// span_with_quotes,
732+
// span,
733+
// mode,
734+
// range,
735+
// err,
736+
// );
730737
}
731738
});
732-
733-
// We normally exclude the quotes for the symbol, but for errors we
734-
// include it because it results in clearer error messages.
735-
if !has_fatal_err {
736-
(kind, Symbol::intern(lit_content))
737-
} else {
738-
(token::Err, self.symbol_from_to(start, end))
739-
}
739+
(kind, Symbol::intern(lit_content))
740740
}
741741

742742
fn cook_quoted(

compiler/rustc_parse/src/lexer/unescape_error_reporting.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ pub(crate) fn emit_unescape_error(
1515
lit: &str,
1616
// full span of the literal, including quotes
1717
span_with_quotes: Span,
18-
// interior span of the literal, without quotes
18+
// interior span of the literal, without quotes // njn: is that wrong?
1919
span: Span,
2020
mode: Mode,
2121
// range of the error inside `lit`
2222
range: Range<usize>,
2323
error: EscapeError,
2424
) {
2525
debug!(
26-
"emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}",
27-
lit, span_with_quotes, mode, range, error
26+
"emit_unescape_error: {:?}, {:?}, {:?}, {:?}, {:?}, {:?}",
27+
lit, span_with_quotes, span, mode, range, error
2828
);
2929
let last_char = || {
3030
let c = lit[range.clone()].chars().next_back().unwrap();

0 commit comments

Comments
 (0)