diff --git a/Cargo.lock b/Cargo.lock index 470b38d5a91cb..2d23541b43149 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3809,6 +3809,7 @@ dependencies = [ name = "rustc_driver_impl" version = "0.0.0" dependencies = [ + "anstyle", "ctrlc", "jiff", "libc", @@ -3834,6 +3835,7 @@ dependencies = [ "rustc_index", "rustc_infer", "rustc_interface", + "rustc_lexer", "rustc_lint", "rustc_log", "rustc_macros", diff --git a/compiler/rustc_driver_impl/Cargo.toml b/compiler/rustc_driver_impl/Cargo.toml index 531b9e0c8ff72..c160240a18a77 100644 --- a/compiler/rustc_driver_impl/Cargo.toml +++ b/compiler/rustc_driver_impl/Cargo.toml @@ -5,6 +5,7 @@ edition = "2024" [dependencies] # tidy-alphabetical-start +anstyle = "1.0.13" jiff = { version = "0.2.5", default-features = false, features = ["std"] } rustc_abi = { path = "../rustc_abi" } rustc_ast = { path = "../rustc_ast" } @@ -28,6 +29,7 @@ rustc_incremental = { path = "../rustc_incremental" } rustc_index = { path = "../rustc_index" } rustc_infer = { path = "../rustc_infer" } rustc_interface = { path = "../rustc_interface" } +rustc_lexer = { path = "../rustc_lexer" } rustc_lint = { path = "../rustc_lint" } rustc_log = { path = "../rustc_log" } rustc_macros = { path = "../rustc_macros" } diff --git a/compiler/rustc_driver_impl/src/highlighter.rs b/compiler/rustc_driver_impl/src/highlighter.rs new file mode 100644 index 0000000000000..70b73cc29b723 --- /dev/null +++ b/compiler/rustc_driver_impl/src/highlighter.rs @@ -0,0 +1,159 @@ +//! This module provides a syntax highlighter for Rust code. +//! It is used by the `rustc --explain` command. +//! +//! The syntax highlighter uses `rustc_lexer`'s `tokenize` +//! function to parse the Rust code into a `Vec` of tokens. +//! The highlighter then highlights the tokens in the `Vec`, +//! and writes the highlighted output to the buffer. +use std::io::{self, Write}; + +use anstyle::{AnsiColor, Color, Effects, Style}; +use rustc_lexer::{LiteralKind, strip_shebang, tokenize}; + +const PRIMITIVE_TYPES: &'static [&str] = &[ + "i8", "i16", "i32", "i64", "i128", "isize", // signed integers + "u8", "u16", "u32", "u64", "u128", "usize", // unsigned integers + "f32", "f64", // floating point + "char", "bool", // others +]; + +const KEYWORDS: &'static [&str] = &[ + "static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "as", + "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern", + "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", + "ref", +]; + +const STR_LITERAL_COLOR: AnsiColor = AnsiColor::Green; +const OTHER_LITERAL_COLOR: AnsiColor = AnsiColor::BrightRed; +const DERIVE_COLOR: AnsiColor = AnsiColor::BrightRed; +const KEYWORD_COLOR: AnsiColor = AnsiColor::BrightMagenta; +const TYPE_COLOR: AnsiColor = AnsiColor::Yellow; +const FUNCTION_COLOR: AnsiColor = AnsiColor::Blue; +const USE_COLOR: AnsiColor = AnsiColor::BrightMagenta; +const PRIMITIVE_TYPE_COLOR: AnsiColor = AnsiColor::Cyan; + +/// Highlight a Rust code string and write the highlighted +/// output to the buffer. It serves as a wrapper around +/// `Highlighter::highlight_rustc_lexer`. It is passed to +/// `write_anstream_buf` in the `lib.rs` file. +pub fn highlight(code: &str, buf: &mut Vec) -> io::Result<()> { + let mut highlighter = Highlighter::default(); + highlighter.highlight_rustc_lexer(code, buf) +} + +/// A syntax highlighter for Rust code +/// It is used by the `rustc --explain` command. +#[derive(Default)] +pub struct Highlighter { + /// Used to track if the previous token was a token + /// that warrants the next token to be colored differently + /// + /// For example, the keyword `fn` requires the next token + /// (the function name) to be colored differently. + prev_was_special: bool, + /// Used to track the length of tokens that have been + /// written so far. This is used to find the original + /// lexeme for a token from the code string. + len_accum: usize, +} + +impl Highlighter { + /// Create a new highlighter + pub fn new() -> Self { + Self::default() + } + + /// Highlight a Rust code string and write the highlighted + /// output to the buffer. + pub fn highlight_rustc_lexer(&mut self, code: &str, buf: &mut Vec) -> io::Result<()> { + use rustc_lexer::TokenKind; + + // Remove shebang from code string + let stripped_idx = strip_shebang(code).unwrap_or(0); + let stripped_code = &code[stripped_idx..]; + self.len_accum = stripped_idx; + let len_accum = &mut self.len_accum; + let tokens = tokenize(stripped_code, rustc_lexer::FrontmatterAllowed::No); + for token in tokens { + let len = token.len as usize; + // If the previous token was a special token, and this token is + // not a whitespace token, then it should be colored differently + let token_str = &code[*len_accum..*len_accum + len]; + if self.prev_was_special { + if token_str != " " { + self.prev_was_special = false; + } + let style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue))); + write!(buf, "{style}{token_str}{style:#}")?; + *len_accum += len; + continue; + } + match token.kind { + TokenKind::Ident => { + let mut style = Style::new(); + // Match if an identifier is a (well-known) keyword + if KEYWORDS.contains(&token_str) { + if token_str == "fn" { + self.prev_was_special = true; + } + style = style.fg_color(Some(Color::Ansi(KEYWORD_COLOR))); + } + // The `use` keyword is colored differently + if matches!(token_str, "use") { + style = style.fg_color(Some(Color::Ansi(USE_COLOR))); + } + // This heuristic test is to detect if the identifier is + // a function call. If it is, then the function identifier is + // colored differently. + if code[*len_accum..*len_accum + len + 1].ends_with('(') { + style = style.fg_color(Some(Color::Ansi(FUNCTION_COLOR))); + } + // The `derive` keyword is colored differently. + if token_str == "derive" { + style = style.fg_color(Some(Color::Ansi(DERIVE_COLOR))); + } + // This heuristic test is to detect if the identifier is + // a type. If it is, then the identifier is colored differently. + if matches!(token_str.chars().next().map(|c| c.is_uppercase()), Some(true)) { + style = style.fg_color(Some(Color::Ansi(TYPE_COLOR))); + } + // This if statement is to detect if the identifier is a primitive type. + if PRIMITIVE_TYPES.contains(&token_str) { + style = style.fg_color(Some(Color::Ansi(PRIMITIVE_TYPE_COLOR))); + } + write!(buf, "{style}{token_str}{style:#}")?; + } + + // Color literals + TokenKind::Literal { kind, suffix_start: _ } => { + // Strings -> Green + // Chars -> Green + // Raw strings -> Green + // C strings -> Green + // Byte Strings -> Green + // Other literals -> Bright Red (Orage-esque) + let style = match kind { + LiteralKind::Str { terminated: _ } + | LiteralKind::Char { terminated: _ } + | LiteralKind::RawStr { n_hashes: _ } + | LiteralKind::CStr { terminated: _ } => { + Style::new().fg_color(Some(Color::Ansi(STR_LITERAL_COLOR))) + } + _ => Style::new().fg_color(Some(Color::Ansi(OTHER_LITERAL_COLOR))), + }; + write!(buf, "{style}{token_str}{style:#}")?; + } + _ => { + // All other tokens are dimmed + let style = Style::new() + .fg_color(Some(Color::Ansi(AnsiColor::BrightWhite))) + .effects(Effects::DIMMED); + write!(buf, "{style}{token_str}{style:#}")?; + } + } + *len_accum += len; + } + Ok(()) + } +} diff --git a/compiler/rustc_driver_impl/src/lib.rs b/compiler/rustc_driver_impl/src/lib.rs index 7820198f2dcf2..045292338e580 100644 --- a/compiler/rustc_driver_impl/src/lib.rs +++ b/compiler/rustc_driver_impl/src/lib.rs @@ -86,6 +86,7 @@ pub mod args; pub mod pretty; #[macro_use] mod print; +pub mod highlighter; mod session_diagnostics; // Keep the OS parts of this `cfg` in sync with the `cfg` on the `libc` @@ -521,7 +522,11 @@ fn show_md_content_with_pager(content: &str, color: ColorConfig) { let mdstream = markdown::MdStream::parse_str(content); let bufwtr = markdown::create_stdout_bufwtr(); let mut mdbuf = Vec::new(); - if mdstream.write_anstream_buf(&mut mdbuf).is_ok() { Some((bufwtr, mdbuf)) } else { None } + if mdstream.write_anstream_buf(&mut mdbuf, Some(&highlighter::highlight)).is_ok() { + Some((bufwtr, mdbuf)) + } else { + None + } }; // Try to print via the pager, pretty output if possible. diff --git a/compiler/rustc_errors/src/markdown/mod.rs b/compiler/rustc_errors/src/markdown/mod.rs index 4f5e2328234d8..9993407b05c0e 100644 --- a/compiler/rustc_errors/src/markdown/mod.rs +++ b/compiler/rustc_errors/src/markdown/mod.rs @@ -18,9 +18,14 @@ impl<'a> MdStream<'a> { parse::entrypoint(s) } - /// Write formatted output to an anstream buffer - pub fn write_anstream_buf(&self, buf: &mut Vec) -> io::Result<()> { - term::entrypoint(self, buf) + /// Write formatted output to a stdout buffer, optionally with + /// a formatter for code blocks + pub fn write_anstream_buf( + &self, + buf: &mut Vec, + formatter: Option<&(dyn Fn(&str, &mut Vec) -> io::Result<()> + 'static)>, + ) -> io::Result<()> { + term::entrypoint(self, buf, formatter) } } diff --git a/compiler/rustc_errors/src/markdown/term.rs b/compiler/rustc_errors/src/markdown/term.rs index b0ce01548f00f..b94cd06b30ef0 100644 --- a/compiler/rustc_errors/src/markdown/term.rs +++ b/compiler/rustc_errors/src/markdown/term.rs @@ -12,29 +12,33 @@ thread_local! { static CURSOR: Cell = const { Cell::new(0) }; /// Width of the terminal static WIDTH: Cell = const { Cell::new(DEFAULT_COLUMN_WIDTH) }; + } -/// Print to terminal output to a buffer -pub(crate) fn entrypoint(stream: &MdStream<'_>, buf: &mut Vec) -> io::Result<()> { - #[cfg(not(test))] - if let Some((w, _)) = termize::dimensions() { - WIDTH.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH)); - } - write_stream(stream, buf, None, 0)?; +/// Print to the terminal output to a buffer +/// optionally with a formatter for code blocks +pub(crate) fn entrypoint( + stream: &MdStream<'_>, + buf: &mut Vec, + formatter: Option<&(dyn Fn(&str, &mut Vec) -> io::Result<()> + 'static)>, +) -> io::Result<()> { + write_stream(stream, buf, None, 0, formatter)?; buf.write_all(b"\n") } -/// Write the buffer, reset to the default style after each + +/// Write the buffer, reset to the default style after each, +/// optionally with a formatter for code blocks fn write_stream( MdStream(stream): &MdStream<'_>, buf: &mut Vec, + default: Option