Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rollup of 7 pull requests #104192

Merged
merged 25 commits into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9e36fd9
stabilize `int_log`
Oct 26, 2022
8498e3a
Add examples for `pointer::mask`
WaffleLapkin Oct 28, 2022
f32e678
Rename some variables.
nnethercote Nov 3, 2022
84ca2c3
Clarify range calculations.
nnethercote Nov 3, 2022
34b32b0
Use `Mode` less.
nnethercote Nov 3, 2022
7dbf2c0
Make non-ASCII errors more consistent.
nnethercote Nov 3, 2022
a21c045
Improve comments.
nnethercote Nov 3, 2022
d963686
Refactor `cook_lexer_literal`.
nnethercote Nov 3, 2022
a203482
Inline and remove `validate_int_literal`.
nnethercote Nov 3, 2022
f8e2cef
Move intra-doc link checks to a separate function.
ehuss Nov 4, 2022
57b2290
Remove reference from the intra-doc link checker.
ehuss Nov 4, 2022
971a146
Promote {aarch64,i686,x86_64}-unknown-uefi to Tier 2
nicholasbishop Nov 3, 2022
a838952
Remove `unescape_byte_literal`.
nnethercote Nov 4, 2022
43d21b5
Rename some `result` variables as `res`, for consistency.
nnethercote Nov 4, 2022
f67ee43
rustdoc: Add mutable to the description
yancyribbens Nov 7, 2022
d6c97a3
Simplify `unescape_{char,byte}`.
nnethercote Nov 8, 2022
f665847
const Compare Tuples
onestacked Nov 7, 2022
b6c05eb
Cleanup fn trait ref test
onestacked Nov 9, 2022
1db7f69
Rollup merge of #103570 - lukas-code:stabilize-ilog, r=scottmcm
Dylan-DPC Nov 9, 2022
b457d70
Rollup merge of #103694 - WaffleLapkin:mask_doc_example, r=scottmcm
Dylan-DPC Nov 9, 2022
4b50fb3
Rollup merge of #103919 - nnethercote:unescaping-cleanups, r=matklad
Dylan-DPC Nov 9, 2022
2313d32
Rollup merge of #103933 - nicholasbishop:bishop-uefi-tier-2, r=JohnTitor
Dylan-DPC Nov 9, 2022
1d262cd
Rollup merge of #103952 - ehuss:dont-intra-linkcheck-reference, r=Mar…
Dylan-DPC Nov 9, 2022
64e737c
Rollup merge of #104111 - yancyribbens:add-mutable-to-the-description…
Dylan-DPC Nov 9, 2022
062f2fc
Rollup merge of #104125 - ink-feather-org:const_cmp_tuples, r=fee1-dead
Dylan-DPC Nov 9, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 11 additions & 18 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@

use crate::ast::{self, Lit, LitKind};
use crate::token::{self, Token};

use rustc_lexer::unescape::{unescape_byte, unescape_char};
use rustc_lexer::unescape::{unescape_byte_literal, unescape_literal, Mode};
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;

use std::ascii;

pub enum LitError {
Expand Down Expand Up @@ -109,13 +106,11 @@ impl LitKind {
let s = symbol.as_str();
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
unescape_literal(&s, Mode::ByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
Expand All @@ -127,13 +122,11 @@ impl LitKind {
let bytes = if s.contains('\r') {
let mut buf = Vec::with_capacity(s.len());
let mut error = Ok(());
unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
unescape_literal(&s, Mode::RawByteStr, &mut |_, c| match c {
Ok(c) => buf.push(byte_from_char(c)),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
});
Expand Down
10 changes: 5 additions & 5 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,13 @@ pub enum RawStrError {
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Base {
/// Literal starts with "0b".
Binary,
Binary = 2,
/// Literal starts with "0o".
Octal,
/// Literal starts with "0x".
Hexadecimal,
Octal = 8,
/// Literal doesn't contain a prefix.
Decimal,
Decimal = 10,
/// Literal starts with "0x".
Hexadecimal = 16,
}

/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
Expand Down
130 changes: 51 additions & 79 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,8 @@ pub enum EscapeError {

/// Unicode escape code in byte literal.
UnicodeEscapeInByte,
/// Non-ascii character in byte literal.
/// Non-ascii character in byte literal, byte string literal, or raw byte string literal.
NonAsciiCharInByte,
/// Non-ascii character in byte string literal.
NonAsciiCharInByteString,

/// After a line ending with '\', the next line contains whitespace
/// characters that are not skipped.
Expand All @@ -78,54 +76,33 @@ impl EscapeError {
/// Takes a contents of a literal (without quotes) and produces a
/// sequence of escaped characters or errors.
/// Values are returned through invoking of the provided callback.
pub fn unescape_literal<F>(literal_text: &str, mode: Mode, callback: &mut F)
pub fn unescape_literal<F>(src: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
match mode {
Mode::Char | Mode::Byte => {
let mut chars = literal_text.chars();
let result = unescape_char_or_byte(&mut chars, mode);
// The Chars iterator moved forward.
callback(0..(literal_text.len() - chars.as_str().len()), result);
let mut chars = src.chars();
let res = unescape_char_or_byte(&mut chars, mode == Mode::Byte);
callback(0..(src.len() - chars.as_str().len()), res);
}
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(literal_text, mode, callback),
// NOTE: Raw strings do not perform any explicit character escaping, here we
// only translate CRLF to LF and produce errors on bare CR.
Mode::Str | Mode::ByteStr => unescape_str_or_byte_str(src, mode == Mode::ByteStr, callback),
Mode::RawStr | Mode::RawByteStr => {
unescape_raw_str_or_raw_byte_str(literal_text, mode, callback)
unescape_raw_str_or_raw_byte_str(src, mode == Mode::RawByteStr, callback)
}
}
}

/// Takes a contents of a byte, byte string or raw byte string (without quotes)
/// and produces a sequence of bytes or errors.
/// Values are returned through invoking of the provided callback.
pub fn unescape_byte_literal<F>(literal_text: &str, mode: Mode, callback: &mut F)
where
F: FnMut(Range<usize>, Result<u8, EscapeError>),
{
debug_assert!(mode.is_bytes());
unescape_literal(literal_text, mode, &mut |range, result| {
callback(range, result.map(byte_from_char));
})
}

/// Takes a contents of a char literal (without quotes), and returns an
/// unescaped char or an error
pub fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
let mut chars = literal_text.chars();
unescape_char_or_byte(&mut chars, Mode::Char)
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
/// unescaped char or an error.
pub fn unescape_char(src: &str) -> Result<char, EscapeError> {
unescape_char_or_byte(&mut src.chars(), false)
}

/// Takes a contents of a byte literal (without quotes), and returns an
/// unescaped byte or an error.
pub fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
let mut chars = literal_text.chars();
unescape_char_or_byte(&mut chars, Mode::Byte)
.map(byte_from_char)
.map_err(|err| (literal_text.len() - chars.as_str().len(), err))
pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
unescape_char_or_byte(&mut src.chars(), true).map(byte_from_char)
}

/// What kind of literal do we parse.
Expand All @@ -147,20 +124,17 @@ impl Mode {
}
}

pub fn is_bytes(self) -> bool {
pub fn is_byte(self) -> bool {
match self {
Mode::Byte | Mode::ByteStr | Mode::RawByteStr => true,
Mode::Char | Mode::Str | Mode::RawStr => false,
}
}
}

fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
fn scan_escape(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
// Previous character was '\\', unescape what follows.

let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;

let res = match second_char {
let res = match chars.next().ok_or(EscapeError::LoneSlash)? {
'"' => '"',
'n' => '\n',
'r' => '\r',
Expand All @@ -181,7 +155,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
let value = hi * 16 + lo;

// For a non-byte literal verify that it is within ASCII range.
if !mode.is_bytes() && !is_ascii(value) {
if !is_byte && !is_ascii(value) {
return Err(EscapeError::OutOfRangeHexEscape);
}
let value = value as u8;
Expand Down Expand Up @@ -217,7 +191,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {

// Incorrect syntax has higher priority for error reporting
// than unallowed value for a literal.
if mode.is_bytes() {
if is_byte {
return Err(EscapeError::UnicodeEscapeInByte);
}

Expand Down Expand Up @@ -249,23 +223,22 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
}

#[inline]
fn ascii_check(first_char: char, mode: Mode) -> Result<char, EscapeError> {
if mode.is_bytes() && !first_char.is_ascii() {
fn ascii_check(c: char, is_byte: bool) -> Result<char, EscapeError> {
if is_byte && !c.is_ascii() {
// Byte literal can't be a non-ascii character.
Err(EscapeError::NonAsciiCharInByte)
} else {
Ok(first_char)
Ok(c)
}
}

fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
debug_assert!(mode == Mode::Char || mode == Mode::Byte);
let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match first_char {
'\\' => scan_escape(chars, mode),
fn unescape_char_or_byte(chars: &mut Chars<'_>, is_byte: bool) -> Result<char, EscapeError> {
let c = chars.next().ok_or(EscapeError::ZeroChars)?;
let res = match c {
'\\' => scan_escape(chars, is_byte),
'\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(EscapeError::BareCarriageReturn),
_ => ascii_check(first_char, mode),
_ => ascii_check(c, is_byte),
}?;
if chars.next().is_some() {
return Err(EscapeError::MoreThanOneChar);
Expand All @@ -275,20 +248,20 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca

/// Takes a contents of a string literal (without quotes) and produces a
/// sequence of escaped characters or errors.
fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
fn unescape_str_or_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
debug_assert!(mode == Mode::Str || mode == Mode::ByteStr);
let initial_len = src.len();
let mut chars = src.chars();
while let Some(first_char) = chars.next() {
let start = initial_len - chars.as_str().len() - first_char.len_utf8();

let unescaped_char = match first_char {
// The `start` and `end` computation here is complicated because
// `skip_ascii_whitespace` makes us to skip over chars without counting
// them in the range computation.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\\' => {
let second_char = chars.clone().next();
match second_char {
match chars.clone().next() {
Some('\n') => {
// Rust language specification requires us to skip whitespaces
// if unescaped '\' character is followed by '\n'.
Expand All @@ -297,17 +270,17 @@ where
skip_ascii_whitespace(&mut chars, start, callback);
continue;
}
_ => scan_escape(&mut chars, mode),
_ => scan_escape(&mut chars, is_byte),
}
}
'\n' => Ok('\n'),
'\t' => Ok('\t'),
'"' => Err(EscapeError::EscapeOnlyChar),
'\r' => Err(EscapeError::BareCarriageReturn),
_ => ascii_check(first_char, mode),
_ => ascii_check(c, is_byte),
};
let end = initial_len - chars.as_str().len();
callback(start..end, unescaped_char);
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}

fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
Expand Down Expand Up @@ -340,30 +313,29 @@ where
/// Takes a contents of a string literal (without quotes) and produces a
/// sequence of characters or errors.
/// NOTE: Raw strings do not perform any explicit character escaping, here we
/// only translate CRLF to LF and produce errors on bare CR.
fn unescape_raw_str_or_raw_byte_str<F>(literal_text: &str, mode: Mode, callback: &mut F)
/// only produce errors on bare CR.
fn unescape_raw_str_or_raw_byte_str<F>(src: &str, is_byte: bool, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
debug_assert!(mode == Mode::RawStr || mode == Mode::RawByteStr);
let initial_len = literal_text.len();

let mut chars = literal_text.chars();
while let Some(curr) = chars.next() {
let start = initial_len - chars.as_str().len() - curr.len_utf8();
let mut chars = src.chars();

let result = match curr {
// The `start` and `end` computation here matches the one in
// `unescape_str_or_byte_str` for consistency, even though this function
// doesn't have to worry about skipping any chars.
while let Some(c) = chars.next() {
let start = src.len() - chars.as_str().len() - c.len_utf8();
let res = match c {
'\r' => Err(EscapeError::BareCarriageReturnInRawString),
c if mode.is_bytes() && !c.is_ascii() => Err(EscapeError::NonAsciiCharInByteString),
c => Ok(c),
_ => ascii_check(c, is_byte),
};
let end = initial_len - chars.as_str().len();

callback(start..end, result);
let end = src.len() - chars.as_str().len();
callback(start..end, res);
}
}

fn byte_from_char(c: char) -> u8 {
#[inline]
pub fn byte_from_char(c: char) -> u8 {
let res = c as u32;
debug_assert!(res <= u8::MAX as u32, "guaranteed because of Mode::ByteStr");
res as u8
Expand Down
29 changes: 10 additions & 19 deletions compiler/rustc_lexer/src/unescape/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ use super::*;
#[test]
fn test_unescape_char_bad() {
fn check(literal_text: &str, expected_error: EscapeError) {
let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
assert_eq!(actual_result, Err(expected_error));
assert_eq!(unescape_char(literal_text), Err(expected_error));
}

check("", EscapeError::ZeroChars);
Expand Down Expand Up @@ -68,8 +67,7 @@ fn test_unescape_char_bad() {
#[test]
fn test_unescape_char_good() {
fn check(literal_text: &str, expected_char: char) {
let actual_result = unescape_char(literal_text);
assert_eq!(actual_result, Ok(expected_char));
assert_eq!(unescape_char(literal_text), Ok(expected_char));
}

check("a", 'a');
Expand Down Expand Up @@ -149,8 +147,7 @@ fn test_unescape_str_good() {
#[test]
fn test_unescape_byte_bad() {
fn check(literal_text: &str, expected_error: EscapeError) {
let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
assert_eq!(actual_result, Err(expected_error));
assert_eq!(unescape_byte(literal_text), Err(expected_error));
}

check("", EscapeError::ZeroChars);
Expand Down Expand Up @@ -219,8 +216,7 @@ fn test_unescape_byte_bad() {
#[test]
fn test_unescape_byte_good() {
fn check(literal_text: &str, expected_byte: u8) {
let actual_result = unescape_byte(literal_text);
assert_eq!(actual_result, Ok(expected_byte));
assert_eq!(unescape_byte(literal_text), Ok(expected_byte));
}

check("a", b'a');
Expand All @@ -246,10 +242,10 @@ fn test_unescape_byte_good() {
fn test_unescape_byte_str_good() {
fn check(literal_text: &str, expected: &[u8]) {
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
unescape_byte_literal(literal_text, Mode::ByteStr, &mut |range, c| {
unescape_literal(literal_text, Mode::ByteStr, &mut |range, c| {
if let Ok(b) = &mut buf {
match c {
Ok(c) => b.push(c),
Ok(c) => b.push(byte_from_char(c)),
Err(e) => buf = Err((range, e)),
}
}
Expand Down Expand Up @@ -280,18 +276,13 @@ fn test_unescape_raw_str() {

#[test]
fn test_unescape_raw_byte_str() {
fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
let mut unescaped = Vec::with_capacity(literal.len());
unescape_byte_literal(literal, Mode::RawByteStr, &mut |range, res| {
unescaped.push((range, res))
});
unescape_literal(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
assert_eq!(unescaped, expected);
}

check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
check(
"🦀a",
&[(0..4, Err(EscapeError::NonAsciiCharInByteString)), (4..5, Ok(byte_from_char('a')))],
);
check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]);
}
Loading