From 789baed88afab66e04ec9d2182a3c892f553053a Mon Sep 17 00:00:00 2001 From: Pavel Grigorenko Date: Thu, 1 Aug 2024 00:33:45 +0300 Subject: [PATCH] rustc_errors: use perfect hashing for character replacements --- Cargo.lock | 15 +++++ compiler/rustc_errors/Cargo.toml | 1 + compiler/rustc_errors/src/emitter.rs | 96 +++++++++++++--------------- src/tools/tidy/src/deps.rs | 5 ++ 4 files changed, 67 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a4b4e49f82c2e..98a87a8c8f42b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2712,6 +2712,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" dependencies = [ + "phf_macros", "phf_shared 0.11.2", ] @@ -2745,6 +2746,19 @@ dependencies = [ "rand", ] +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn 2.0.67", +] + [[package]] name = "phf_shared" version = "0.10.0" @@ -3653,6 +3667,7 @@ version = "0.0.0" dependencies = [ "annotate-snippets 0.10.2", "derive_setters", + "phf", "rustc_ast", "rustc_ast_pretty", "rustc_data_structures", diff --git a/compiler/rustc_errors/Cargo.toml b/compiler/rustc_errors/Cargo.toml index 2fff9f2de50fb..e6d88ba48b44b 100644 --- a/compiler/rustc_errors/Cargo.toml +++ b/compiler/rustc_errors/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" # tidy-alphabetical-start annotate-snippets = "0.10" derive_setters = "0.1.6" +phf = { version = "0.11.2", features = ["macros"] } rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } rustc_data_structures = { path = "../rustc_data_structures" } diff --git a/compiler/rustc_errors/src/emitter.rs b/compiler/rustc_errors/src/emitter.rs index d673646ace450..f632f518682a9 100644 --- a/compiler/rustc_errors/src/emitter.rs +++ b/compiler/rustc_errors/src/emitter.rs @@ -2563,56 +2563,52 @@ fn num_decimal_digits(num: usize) -> usize { } // We replace some characters so the CLI output is always consistent and underlines aligned. -// Keep the following list in sync with `rustc_span::char_width`. -// ATTENTION: keep lexicografically sorted so that the binary search will work -const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[ - // tidy-alphabetical-start +const OUTPUT_REPLACEMENTS: phf::Map = phf::phf_map![ // In terminals without Unicode support the following will be garbled, but in *all* terminals // the underlying codepoint will be as well. We could gate this replacement behind a "unicode // support" gate. - ('\0', "␀"), - ('\u{0001}', "␁"), - ('\u{0002}', "␂"), - ('\u{0003}', "␃"), - ('\u{0004}', "␄"), - ('\u{0005}', "␅"), - ('\u{0006}', "␆"), - ('\u{0007}', "␇"), - ('\u{0008}', "␈"), - ('\u{0009}', " "), // We do our own tab replacement - ('\u{000b}', "␋"), - ('\u{000c}', "␌"), - ('\u{000d}', "␍"), - ('\u{000e}', "␎"), - ('\u{000f}', "␏"), - ('\u{0010}', "␐"), - ('\u{0011}', "␑"), - ('\u{0012}', "␒"), - ('\u{0013}', "␓"), - ('\u{0014}', "␔"), - ('\u{0015}', "␕"), - ('\u{0016}', "␖"), - ('\u{0017}', "␗"), - ('\u{0018}', "␘"), - ('\u{0019}', "␙"), - ('\u{001a}', "␚"), - ('\u{001b}', "␛"), - ('\u{001c}', "␜"), - ('\u{001d}', "␝"), - ('\u{001e}', "␞"), - ('\u{001f}', "␟"), - ('\u{007f}', "␡"), - ('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters. - ('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently - ('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk - ('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always. - ('\u{202d}', "�"), - ('\u{202e}', "�"), - ('\u{2066}', "�"), - ('\u{2067}', "�"), - ('\u{2068}', "�"), - ('\u{2069}', "�"), - // tidy-alphabetical-end + '\0' => "␀", + '\t' => " ", // We do our own tab replacement + '\r' => "␍", + '\u{0001}' => "␁", + '\u{0002}' => "␂", + '\u{0003}' => "␃", + '\u{0004}' => "␄", + '\u{0005}' => "␅", + '\u{0006}' => "␆", + '\u{0007}' => "␇", + '\u{0008}' => "␈", + '\u{000b}' => "␋", + '\u{000c}' => "␌", + '\u{000e}' => "␎", + '\u{000f}' => "␏", + '\u{0010}' => "␐", + '\u{0011}' => "␑", + '\u{0012}' => "␒", + '\u{0013}' => "␓", + '\u{0014}' => "␔", + '\u{0015}' => "␕", + '\u{0016}' => "␖", + '\u{0017}' => "␗", + '\u{0018}' => "␘", + '\u{0019}' => "␙", + '\u{001a}' => "␚", + '\u{001b}' => "␛", + '\u{001c}' => "␜", + '\u{001d}' => "␝", + '\u{001e}' => "␞", + '\u{001f}' => "␟", + '\u{007f}' => "␡", + '\u{200d}' => "", // Replace ZWJ for consistent terminal output of grapheme clusters. + '\u{202a}' => "�", // The following unicode text flow control characters are inconsistently + '\u{202b}' => "�", // supported across CLIs and can cause confusion due to the bytes on disk + '\u{202c}' => "�", // not corresponding to the visible source code, so we replace them always. + '\u{202d}' => "�", + '\u{202e}' => "�", + '\u{2066}' => "�", + '\u{2067}' => "�", + '\u{2068}' => "�", + '\u{2069}' => "�", ]; fn normalize_whitespace(s: &str) -> String { @@ -2620,9 +2616,9 @@ fn normalize_whitespace(s: &str) -> String { // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input // char. At the end, allocate all chars into a string in one operation. s.chars().fold(String::with_capacity(s.len()), |mut s, c| { - match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) { - Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1), - _ => s.push(c), + match OUTPUT_REPLACEMENTS.get(&c) { + Some(r) => s.push_str(r), + None => s.push(c), } s }) diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index e23e931b0eb04..2a2cfde1d0496 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -345,6 +345,10 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "parking_lot_core", "pathdiff", "perf-event-open-sys", + "phf", + "phf_generator", + "phf_macros", + "phf_shared", "pin-project-lite", "polonius-engine", "portable-atomic", // dependency for platforms doesn't support `AtomicU64` in std @@ -386,6 +390,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "sha2", "sharded-slab", "shlex", + "siphasher", "smallvec", "snap", "stable_deref_trait",