Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

89 changes: 3 additions & 86 deletions src/bun_core/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1746,16 +1746,6 @@ pub fn clear_to_end() {
// </r> - reset
// <r> - reset

/// Lowercase lookup wrapper. The table
/// itself lives in `bun_output_tags` (shared with the `pretty_fmt!` proc-macro
/// so there is exactly one copy).
pub mod color_map {
#[inline]
pub fn get(name: &[u8]) -> Option<&'static str> {
bun_output_tags::color_for_bytes(name)
}
}

pub use ansi::{BOLD, DIM, RESET};
pub use bun_output_tags::{ansi, ansi_b};

Expand Down Expand Up @@ -2057,82 +2047,9 @@ pub fn pretty_fmt_args<A: FmtTuple>(
}

/// Runtime `<tag>` → ANSI rewriter, used for testing the proc-macro and for
/// the rare dynamic case.
///
/// Colour table lives in `bun_output_tags`; the state machine is kept duplicated
/// vs `bun_core_macros::rewrite` because the two intentionally diverge in the
/// `{` arm (proc-macro rewrites specs `{s}`→`{}`; this side copies braces
/// verbatim) and on unknown tags (proc-macro errors; this side emits `""`).
pub fn pretty_fmt_runtime(fmt: &[u8], is_enabled: bool) -> Vec<u8> {
let mut out = Vec::with_capacity(fmt.len() * 4);
let mut i = 0usize;
while i < fmt.len() {
match fmt[i] {
b'\\' => {
i += 1;
if i < fmt.len() {
match fmt[i] {
b'<' | b'>' => {
out.push(fmt[i]);
i += 1;
}
_ => {
out.push(b'\\');
out.push(fmt[i]);
i += 1;
}
}
}
}
b'>' => {
i += 1;
}
b'{' => {
while i < fmt.len() && fmt[i] != b'}' {
out.push(fmt[i]);
i += 1;
}
}
b'<' => {
i += 1;
let mut is_reset = i < fmt.len() && fmt[i] == b'/';
if is_reset {
i += 1;
}
let start = i;
while i < fmt.len() && fmt[i] != b'>' {
i += 1;
}
let color_name = &fmt[start..i];
let color_str: &str = 'picker: {
if let Some(lit) = color_map::get(color_name) {
break 'picker lit;
} else if color_name == b"r" {
is_reset = true;
break 'picker "";
} else {
// Unknown tag: the `pretty_fmt!` proc-macro rejects
// this at its call sites; this runtime path drops the
// tag.
break 'picker "";
}
};
if is_enabled {
out.extend_from_slice(if is_reset {
RESET.as_bytes()
} else {
color_str.as_bytes()
});
}
}
_ => {
out.push(fmt[i]);
i += 1;
}
}
}
out
}
/// the rare dynamic case. The implementation lives in `bun_output_tags` so the
/// `bun_clap_macros` proc-macro crate shares the same state machine.
pub use bun_output_tags::pretty_fmt_runtime;

#[doc(hidden)]
#[inline]
Expand Down
185 changes: 135 additions & 50 deletions src/bun_core/string/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2438,8 +2438,12 @@ pub mod printer {
/// `MutableString`, and any other `crate::io::Write` sink.
pub use crate::io::Write as PrinterWriter;

// PERF: `ascii_only` is a *runtime* arg so the large callers
// (`write_pre_quoted_string_inner`, `bun_js_printer::estimate_length_for_utf8`)
// collapse to a single monomorphization instead of one per
// (ascii_only × quote_char × …) combo — see `write_pre_quoted_string_inner`.
#[inline]
pub(crate) fn can_print_without_escape(c: i32, ascii_only: bool) -> bool {
pub fn can_print_without_escape(c: i32, ascii_only: bool) -> bool {
if c <= LAST_ASCII as i32 {
c >= FIRST_ASCII as i32
&& c != b'\\' as i32
Expand All @@ -2456,9 +2460,20 @@ pub mod printer {
}
}

/// Same algorithm as `bun_js_printer::write_pre_quoted_string`.
/// PERF: (quote_char, ascii_only, json, encoding) are runtime params —
/// profile if it shows up on a hot path.
/// `strings::Encoding` stand-in that derives `ConstParamTy` so it can be
/// used as a const-generic parameter (`const ENCODING: Encoding`). The
/// variant set is identical; convert at the boundary if a
/// `strings::Encoding` is ever needed.
#[derive(Clone, Copy, Debug, PartialEq, Eq, core::marker::ConstParamTy)]
pub enum Encoding {
Ascii,
Utf8,
Latin1,
Utf16,
}

/// Runtime-encoding adapter: selects the matching monomorphized
/// [`write_pre_quoted_string_inner`] instance.
pub fn write_pre_quoted_string<W: PrinterWriter + ?Sized>(
text_in: &[u8],
writer: &mut W,
Expand All @@ -2467,68 +2482,119 @@ pub mod printer {
json: bool,
encoding: StrEncoding,
) -> Result<(), crate::Error> {
debug_assert!(!json || quote_char == b'"');
// utf16 view over the same bytes (only used when encoding == Utf16).
// Callers pass 2-byte-aligned even-length input for Utf16; `cast_slice`
// panics (rather than UB) if that contract is violated.
let text16: &[u16] = if encoding == StrEncoding::Utf16 {
crate::cast_slice::<u8, u16>(text_in)
} else {
&[]
};
let n: usize = if encoding == StrEncoding::Utf16 {
text16.len()
} else {
text_in.len()
};
match encoding {
StrEncoding::Ascii => write_pre_quoted_string_inner::<W, { Encoding::Ascii }>(
text_in, writer, quote_char, ascii_only, json,
),
StrEncoding::Utf8 => write_pre_quoted_string_inner::<W, { Encoding::Utf8 }>(
text_in, writer, quote_char, ascii_only, json,
),
StrEncoding::Latin1 => write_pre_quoted_string_inner::<W, { Encoding::Latin1 }>(
text_in, writer, quote_char, ascii_only, json,
),
StrEncoding::Utf16 => write_pre_quoted_string_inner::<W, { Encoding::Utf16 }>(
text_in, writer, quote_char, ascii_only, json,
),
}
}

/// `quote_char` / `ascii_only` / `json` are runtime args: the branches on
/// them are cheap and well-predicted, and collapsing the monomorphizations
/// keeps the hot transpile pages dense. `ENCODING` stays `const` — it
/// changes the code-unit indexing structure of the loop, so a per-encoding
/// copy is genuinely different code.
#[inline(never)]
pub fn write_pre_quoted_string_inner<W, const ENCODING: Encoding>(
text_in: &[u8],
writer: &mut W,
quote_char: u8,
ascii_only: bool,
json: bool,
) -> Result<(), crate::Error>
where
W: PrinterWriter + ?Sized,
{
debug_assert!(
!(json && quote_char != b'"'),
"for json, quote_char must be '\"'"
);

let text = text_in;
let mut i: usize = 0;
let n: usize = match ENCODING {
Encoding::Utf16 => text.len() / 2,
_ => text.len(),
};

macro_rules! code_unit_at {
($idx:expr) => {
match ENCODING {
Encoding::Utf16 => {
let lo = text[$idx * 2];
let hi = text[$idx * 2 + 1];
u16::from_le_bytes([lo, hi]) as i32
}
_ => text[$idx] as i32,
}
};
}

while i < n {
let width: u8 = match encoding {
StrEncoding::Latin1 | StrEncoding::Ascii | StrEncoding::Utf16 => 1,
StrEncoding::Utf8 => strings::wtf8_byte_sequence_length_with_invalid(text_in[i]),
let width: u8 = match ENCODING {
Encoding::Latin1 | Encoding::Ascii => 1,
Encoding::Utf8 => strings::wtf8_byte_sequence_length_with_invalid(text[i]),
Encoding::Utf16 => 1,
};
let clamped_width = (width as usize).min(n.saturating_sub(i));
let c: i32 = match encoding {
StrEncoding::Utf8 => {
let mut buf = [0u8; 4];
buf[..clamped_width].copy_from_slice(&text_in[i..i + clamped_width]);
strings::decode_wtf8_rune_t::<i32>(buf, width, 0)
let c: i32 = match ENCODING {
Encoding::Utf8 => {
let bytes: [u8; 4] = match clamped_width {
1 => [text[i], 0, 0, 0],
2 => [text[i], text[i + 1], 0, 0],
3 => [text[i], text[i + 1], text[i + 2], 0],
4 => [text[i], text[i + 1], text[i + 2], text[i + 3]],
_ => unreachable!(),
};
strings::decode_wtf8_rune_t::<i32>(bytes, width, 0)
}
Encoding::Ascii => {
debug_assert!(text[i] <= 0x7F);
text[i] as i32
}
StrEncoding::Ascii => {
debug_assert!(text_in[i] <= 0x7F);
text_in[i] as i32
Encoding::Latin1 => text[i] as i32,
Encoding::Utf16 => {
// TODO: if this is a part of a surrogate pair, we could parse the whole codepoint in order
// to emit it as a single \u{result} rather than two paired \uLOW\uHIGH.
// eg: "\u{10334}" will convert to "𐌴" without this.
code_unit_at!(i)
}
StrEncoding::Latin1 => text_in[i] as i32,
StrEncoding::Utf16 => text16[i] as i32,
};

if can_print_without_escape(c, ascii_only) {
match encoding {
StrEncoding::Ascii | StrEncoding::Utf8 => {
let remain = &text_in[i + clamped_width..];
match ENCODING {
Encoding::Ascii | Encoding::Utf8 => {
let remain = &text[i + clamped_width..];
if let Some(j) = strings::index_of_needs_escape_for_java_script_string(
remain, quote_char,
) {
writer.write_all(&text_in[i..i + clamped_width])?;
i += clamped_width;
writer.write_all(&remain[..j as usize])?;
i += j as usize;
let j = j as usize;
writer.write_all(&text[i..i + clamped_width + j])?;
i += clamped_width + j;
} else {
writer.write_all(&text_in[i..])?;
writer.write_all(&text[i..])?;
break;
}
}
StrEncoding::Latin1 | StrEncoding::Utf16 => {
let mut cp = [0u8; 4];
let cp_len = strings::encode_wtf8_rune(&mut cp, c as u32);
writer.write_all(&cp[..cp_len])?;
Encoding::Latin1 | Encoding::Utf16 => {
let mut codepoint_bytes = [0u8; 4];
let codepoint_len =
strings::encode_wtf8_rune(&mut codepoint_bytes, c as u32);
writer.write_all(&codepoint_bytes[..codepoint_len])?;
i += clamped_width;
}
}
continue;
}

match c {
0x07 => {
writer.write_all(b"\\x07")?;
Expand Down Expand Up @@ -2572,11 +2638,16 @@ pub mod printer {
}
0x24 => {
if quote_char == b'`' {
let next_is_brace = match encoding {
StrEncoding::Utf16 => i + 1 < n && text16[i + 1] == b'{' as u16,
_ => i + 1 < n && text_in[i + 1] == b'{',
let next = if i + clamped_width < n {
Some(code_unit_at!(i + clamped_width))
} else {
None
};
writer.write_all(if next_is_brace { b"\\$" } else { b"$" })?;
if next == Some(b'{' as i32) {
writer.write_all(b"\\$")?;
} else {
writer.write_all(b"$")?;
}
} else {
writer.write_all(b"$")?;
}
Expand All @@ -2588,6 +2659,7 @@ pub mod printer {
}
_ => {
i += width as usize;

if c <= 0xFF && !json {
let h = hex2_upper(c as u8);
writer.write_all(&[b'\\', b'x', h[0], h[1]])?;
Expand All @@ -2609,9 +2681,22 @@ pub mod printer {
bytes: &mut MutableString,
ascii_only: bool,
) -> Result<(), crate::Error> {
// PERF: consider pre-growing via an estimated UTF-8 length — profile if it shows up on a hot path.
// `ascii_only` is threaded at runtime so
// the heavy escaper isn't monomorphized per ascii_only/quote-char combo.
//
// Heuristic reservation (~12.5% slack) instead of a full
// escaped-length pre-scan, which would do a SIMD scan + per-escape rune
// decode over `text` just to size the buffer — the same work
// `write_pre_quoted_string_inner` repeats immediately below.
// Tab-indented JS (e.g. three.js) has ~9.4% of bytes needing 2-byte
// escapes (tabs + newlines + quotes/backslashes), so 6.25% slack would
// under-shoot and force a 2x doubling memcpy of the whole source. The
// writer still grows on demand if this under-shoots.
Comment thread
robobun marked this conversation as resolved.
Outdated
bytes.grow_if_needed(text.len() + (text.len() >> 3) + 8)?;
bytes.append_char(b'"')?;
write_pre_quoted_string(text, bytes, b'"', ascii_only, true, StrEncoding::Utf8)?;
write_pre_quoted_string_inner::<_, { Encoding::Utf8 }>(
text, bytes, b'"', ascii_only, true,
)?;
bytes.append_char(b'"').expect("unreachable");
Ok(())
}
Expand Down
Loading
Loading