From 38e0ae590caab982a4305da58a0a62385c2dd880 Mon Sep 17 00:00:00 2001 From: Grisha Vartanyan Date: Wed, 23 Mar 2022 22:13:55 +0100 Subject: [PATCH 1/4] Reduce max hash in raw strings from u16 to u8 --- compiler/rustc_lexer/src/lib.rs | 14 +++++++------- compiler/rustc_lexer/src/tests.rs | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 5b8300ab530f9..a41e0374f410a 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -161,15 +161,15 @@ pub enum LiteralKind { /// "b"abc"", "b"abc" ByteStr { terminated: bool }, /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a" - RawStr { n_hashes: u16, err: Option }, + RawStr { n_hashes: u8, err: Option }, /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a" - RawByteStr { n_hashes: u16, err: Option }, + RawByteStr { n_hashes: u8, err: Option }, } /// Error produced validating a raw string. Represents cases like: /// - `r##~"abcde"##`: `InvalidStarter` /// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)` -/// - Too many `#`s (>65535): `TooManyDelimiters` +/// - Too many `#`s (>255): `TooManyDelimiters` // perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum RawStrError { @@ -178,7 +178,7 @@ pub enum RawStrError { /// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they /// may have intended to terminate it. NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option }, - /// More than 65535 `#`s exist. + /// More than 255 `#`s exist. TooManyDelimiters { found: usize }, } @@ -698,12 +698,12 @@ impl Cursor<'_> { } /// Eats the double-quoted string and returns `n_hashes` and an error if encountered. - fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u16, Option) { + fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u8, Option) { // Wrap the actual function to handle the error with too many hashes. // This way, it eats the whole raw string. let (n_hashes, err) = self.raw_string_unvalidated(prefix_len); - // Only up to 65535 `#`s are allowed in raw strings - match u16::try_from(n_hashes) { + // Only up to 255 `#`s are allowed in raw strings + match u8::try_from(n_hashes) { Ok(num) => (num, err), // We lie about the number of hashes here :P Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })), diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index 548de67449abf..07daee06f0f86 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -2,7 +2,7 @@ use super::*; use expect_test::{expect, Expect}; -fn check_raw_str(s: &str, expected_hashes: u16, expected_err: Option) { +fn check_raw_str(s: &str, expected_hashes: u8, expected_err: Option) { let s = &format!("r{}", s); let mut cursor = Cursor::new(s); cursor.bump(); @@ -68,13 +68,13 @@ fn test_unterminated_no_pound() { #[test] fn test_too_many_hashes() { - let max_count = u16::MAX; + let max_count = u8::MAX; let mut hashes: String = "#".repeat(max_count.into()); - // Valid number of hashes (65535 = 2^16 - 1), but invalid string. + // Valid number of hashes (255 = 2^8 - 1 = u8::MAX), but invalid string. check_raw_str(&hashes, max_count, Some(RawStrError::InvalidStarter { bad_char: '\u{0}' })); - // One more hash sign (65536 = 2^16) becomes too many. + // One more hash sign (256 = 2^8) becomes too many. hashes.push('#'); check_raw_str( &hashes, From b51f20eaf5cc311ca601643306ec2ee0c8a714e8 Mon Sep 17 00:00:00 2001 From: Grisha Vartanyan Date: Wed, 23 Mar 2022 23:07:39 +0100 Subject: [PATCH 2/4] Update syntax tree definition --- compiler/rustc_ast/src/ast.rs | 2 +- compiler/rustc_ast/src/token.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index f5c4affdce2d1..75ccbc92be1f6 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -1616,7 +1616,7 @@ pub enum StrStyle { /// A raw string, like `r##"foo"##`. /// /// The value is the number of `#` symbols used. - Raw(u16), + Raw(u8), } /// An AST literal. diff --git a/compiler/rustc_ast/src/token.rs b/compiler/rustc_ast/src/token.rs index c367573de8a94..b860612f43096 100644 --- a/compiler/rustc_ast/src/token.rs +++ b/compiler/rustc_ast/src/token.rs @@ -60,9 +60,9 @@ pub enum LitKind { Integer, Float, Str, - StrRaw(u16), // raw string delimited by `n` hash symbols + StrRaw(u8), // raw string delimited by `n` hash symbols ByteStr, - ByteStrRaw(u16), // raw byte string delimited by `n` hash symbols + ByteStrRaw(u8), // raw byte string delimited by `n` hash symbols Err, } From c3840c9ea142c80107067869143192498beec3d2 Mon Sep 17 00:00:00 2001 From: Grisha Vartanyan Date: Fri, 25 Mar 2022 15:05:27 +0100 Subject: [PATCH 3/4] Update clippy helper function types --- src/tools/clippy/clippy_lints/src/regex.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/clippy/clippy_lints/src/regex.rs b/src/tools/clippy/clippy_lints/src/regex.rs index b6d04334de9e5..a92097e1d24ca 100644 --- a/src/tools/clippy/clippy_lints/src/regex.rs +++ b/src/tools/clippy/clippy_lints/src/regex.rs @@ -81,7 +81,7 @@ impl<'tcx> LateLintPass<'tcx> for Regex { #[allow(clippy::cast_possible_truncation)] // truncation very unlikely here #[must_use] -fn str_span(base: Span, c: regex_syntax::ast::Span, offset: u16) -> Span { +fn str_span(base: Span, c: regex_syntax::ast::Span, offset: u8) -> Span { let offset = u32::from(offset); let end = base.lo() + BytePos(u32::try_from(c.end.offset).expect("offset too large") + offset); let start = base.lo() + BytePos(u32::try_from(c.start.offset).expect("offset too large") + offset); From 759d1e6af8610cdfdbf091a5d3d825b05c1fc6a2 Mon Sep 17 00:00:00 2001 From: Grisha Vartanyan Date: Wed, 30 Mar 2022 18:20:30 +0200 Subject: [PATCH 4/4] Update error message & remove outdated test comment --- compiler/rustc_parse/src/lexer/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 601a39e69ab9d..d688d37c1cf3d 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -589,15 +589,13 @@ impl<'a> StringReader<'a> { } } - /// Note: It was decided to not add a test case, because it would be too big. - /// fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! { self.fatal_span_( start, self.pos, &format!( "too many `#` symbols: raw strings may be delimited \ - by up to 65535 `#` symbols, but found {}", + by up to 255 `#` symbols, but found {}", found ), )