Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions crates/ruff_annotate_snippets/src/renderer/display_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
//!
//! The above snippet has been built out of the following structure:
use crate::{Id, snippet};
use std::borrow::Cow;
use std::cmp::{Reverse, max, min};
use std::collections::HashMap;
use std::fmt::Display;
Expand Down Expand Up @@ -1863,12 +1864,21 @@ const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
('\u{2069}', ""),
];

fn normalize_whitespace(str: &str) -> String {
fn normalize_whitespace(str: &str) -> Cow<'_, str> {
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked the upstream implementation to see if they'd done anything similar, but we've diverged a bit already:

https://github.com/rust-lang/annotate-snippets-rs/blob/45424f56bb51049361366c781d0e5acefa7ad677/src/renderer/render.rs#L2538

after rust-lang/annotate-snippets-rs@84d07af specifically.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: You could try the following, but it's not unlikely that it is slower, if the current contains call use SIMD or gets vectorized:

    let mut output = String::new();
    let mut last_index = 0usize;

    for (index, c) in str.char_indices() {
        let replacement = match c {
            '\t' => "    ",   // We do our own tab replacement
            '\u{200D}' => "", // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
            '\u{202A}' => "", // The following unicode text flow control characters are inconsistently
            '\u{202B}' => "", // supported across CLIs and can cause confusion due to the bytes on disk
            '\u{202D}' => "", // not corresponding to the visible source code, so we replace them always.
            '\u{202E}' => "",
            '\u{2066}' => "",
            '\u{2067}' => "",
            '\u{2068}' => "",
            '\u{202C}' => "",
            '\u{2069}' => "",
            _ => continue,
        };

        if output.is_empty() {
            output.reserve(str.len());
        }

        output.push_str(&str[last_index..index]);
        output.push_str(replacement);
        last_index = index + c.len_utf8();
    }

    if output.is_empty() {
        Cow::Borrowed(str)
    } else {
        output.push_str(&str[last_index..]);
        Cow::Owned(output)
    }

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, that did look like a good idea, but it appears to be slightly slower, at least on CPython and on my machine:

Command Mean [s] Min [s] Max [s] Relative
8147494 3.713 ± 0.038 3.665 3.778 1.00
1c764d1 (this commit locally) 3.811 ± 0.064 3.768 3.987 1.03 ± 0.02

I think I'll stick with the current version for now.

// This is an optimization to avoid repeated `str::replace` calls in the typical case of no
// valid replacements. Note that this list needs to be kept in sync with `OUTPUT_REPLACEMENTS`.
if !str.contains([
'\t', '\u{200d}', '\u{202a}', '\u{202b}', '\u{202d}', '\u{202e}', '\u{2066}', '\u{2067}',
'\u{2068}', '\u{202c}', '\u{2069}',
]) {
return Cow::Borrowed(str);
}

let mut s = str.to_owned();
for (c, replacement) in OUTPUT_REPLACEMENTS {
s = s.replace(*c, replacement);
}
s
Cow::Owned(s)
}

fn overlaps(
Expand Down
23 changes: 16 additions & 7 deletions crates/ruff_annotate_snippets/src/renderer/styled_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ impl StyledBuffer {
}

pub(crate) fn render(&self, stylesheet: &Stylesheet) -> Result<String, fmt::Error> {
let mut str = String::new();
let capacity = self.lines.iter().map(|line| line.len()).sum();
let mut str = String::with_capacity(capacity);
for (i, line) in self.lines.iter().enumerate() {
let mut current_style = stylesheet.none;
for ch in line {
Expand All @@ -49,11 +50,11 @@ impl StyledBuffer {
current_style = ch.style;
write!(str, "{}", current_style.render())?;
}
write!(str, "{}", ch.ch)?;
str.push(ch.ch);
}
write!(str, "{}", current_style.render_reset())?;
if i != self.lines.len() - 1 {
writeln!(str)?;
str.push('\n');
}
}
Ok(str)
Expand All @@ -74,10 +75,18 @@ impl StyledBuffer {
/// If `line` does not exist in our buffer, adds empty lines up to the given
/// and fills the last line with unstyled whitespace.
pub(crate) fn puts(&mut self, line: usize, col: usize, string: &str, style: Style) {
let mut n = col;
for c in string.chars() {
self.putc(line, n, c, style);
n += 1;
if string.is_empty() {
return;
}
self.ensure_lines(line);
let char_count = string.chars().count();
let needed = col + char_count;
if needed > self.lines[line].len() {
self.lines[line].resize(needed, StyledChar::SPACE);
}
let line = &mut self.lines[line];
for (i, c) in string.chars().enumerate() {
line[col + i] = StyledChar::new(c, style);
}
}
/// For given `line` inserts `string` with `style` after old content of that line,
Expand Down
Loading