From b0a89441208e56f85b5c51eccde89275f667f787 Mon Sep 17 00:00:00 2001 From: Pavel Grigorenko Date: Thu, 1 Aug 2024 00:57:38 +0300 Subject: [PATCH 1/3] rustc_errors: enforce OUTPUT_REPLACEMENTS is sorted with a compile-time assertion --- compiler/rustc_errors/src/emitter.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index d673646ace450..0e9c7ef5fb3c4 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2564,9 +2564,7 @@ fn num_decimal_digits(num: usize) -> usize { // We replace some characters so the CLI output is always consistent and underlines aligned. // Keep the following list in sync with `rustc_span::char_width`. -// ATTENTION: keep lexicografically sorted so that the binary search will work const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ - // tidy-alphabetical-start // In terminals without Unicode support the following will be garbled, but in *all* terminals // the underlying codepoint will be as well. We could gate this replacement behind a "unicode // support" gate. @@ -2579,7 +2577,7 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ ('\u{0006}', "␆"), ('\u{0007}', "␇"), ('\u{0008}', "␈"), - ('\u{0009}', " "), // We do our own tab replacement + ('\t', " "), // We do our own tab replacement ('\u{000b}', "␋"), ('\u{000c}', "␌"), ('\u{000d}', "␍"), @@ -2612,10 +2610,20 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ ('\u{2067}', "�"), ('\u{2068}', "�"), ('\u{2069}', "�"), - // tidy-alphabetical-end ]; fn normalize_whitespace(s: &str) -> String { + const { + let mut i = 1; + while i < OUTPUT_REPLACEMENTS.len() { + assert!( + OUTPUT_REPLACEMENTS[i - 1].0 < OUTPUT_REPLACEMENTS[i].0, + "The OUTPUT_REPLACEMENTS array must be sorted (for binary search to work) \ + and must contain no duplicate entries" + ); + i += 1; + } + } // Scan the input string for a character in the ordered table above. If it's present, replace // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input // char. At the end, allocate all chars into a string in one operation. From 15982b2fcae0fba61816dbd31617bc6604e36af5 Mon Sep 17 00:00:00 2001 From: Pavel Grigorenko Date: Thu, 1 Aug 2024 01:02:25 +0300 Subject: [PATCH 2/3] rustc_errors: fix inaccurate comment --- compiler/rustc_errors/src/emitter.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index 0e9c7ef5fb3c4..ef9194022b529 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2624,9 +2624,9 @@ fn normalize_whitespace(s: &str) -> String { i += 1; } } - // Scan the input string for a character in the ordered table above. If it's present, replace - // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input - // char. At the end, allocate all chars into a string in one operation. + // Scan the input string for a character in the ordered table above. + // If it's present, replace it with its alternative string (it can be more than 1 char!). + // Otherwise, retain the input char. s.chars().fold(String::with_capacity(s.len()), |mut s, c| { match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) { Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1), From 7869400e589ca4e88cebb1da80a420f8e9042cdf Mon Sep 17 00:00:00 2001 From: Zachary S Date: Wed, 7 Aug 2024 13:04:43 -0500 Subject: [PATCH 3/3] Update E0517 message to reflect RFC 2195. --- .../src/error_codes/E0517.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_error_codes/src/error_codes/E0517.md b/compiler/rustc_error_codes/src/error_codes/E0517.md index ae802245bd1d7..5354a08bf31a7 100644 --- a/compiler/rustc_error_codes/src/error_codes/E0517.md +++ b/compiler/rustc_error_codes/src/error_codes/E0517.md @@ -25,14 +25,17 @@ impl Foo { These attributes do not work on typedefs, since typedefs are just aliases. Representations like `#[repr(u8)]`, `#[repr(i64)]` are for selecting the -discriminant size for enums with no data fields on any of the variants, e.g. -`enum Color {Red, Blue, Green}`, effectively setting the size of the enum to -the size of the provided type. Such an enum can be cast to a value of the same -type as well. In short, `#[repr(u8)]` makes the enum behave like an integer -with a constrained set of allowed values. +discriminant size for enums. For enums with no data fields on any of the +variants, e.g. `enum Color {Red, Blue, Green}`, this effectively sets the size +of the enum to the size of the provided type. Such an enum can be cast to a +value of the same type as well. In short, `#[repr(u8)]` makes a field-less enum +behave like an integer with a constrained set of allowed values. -Only field-less enums can be cast to numerical primitives, so this attribute -will not apply to structs. +For a description of how `#[repr(C)]` and representations like `#[repr(u8)]` +affect the layout of enums with data fields, see [RFC 2195][rfc2195]. + +Only field-less enums can be cast to numerical primitives. Representations like +`#[repr(u8)]` will not apply to structs. `#[repr(packed)]` reduces padding to make the struct size smaller. The representation of enums isn't strictly defined in Rust, and this attribute @@ -42,3 +45,5 @@ won't work on enums. types (i.e., `u8`, `i32`, etc) a representation that permits vectorization via SIMD. This doesn't make much sense for enums since they don't consist of a single list of data. + +[rfc2195]: https://github.com/rust-lang/rfcs/blob/master/text/2195-really-tagged-unions.md