diff --git a/crates/oxc_codegen/src/code_buffer.rs b/crates/oxc_codegen/src/code_buffer.rs index 34ade405f163c..1516d74632652 100644 --- a/crates/oxc_codegen/src/code_buffer.rs +++ b/crates/oxc_codegen/src/code_buffer.rs @@ -75,6 +75,15 @@ impl CodeBuffer { self.buf.len() } + /// Returns the capacity of the buffer in bytes. + /// + /// This is *not* the same as capacity in characters, + /// since non-ASCII characters require multiple bytes. + #[inline] + pub fn capacity(&self) -> usize { + self.buf.capacity() + } + /// Returns `true` if the buffer contains no characters. /// /// # Example @@ -365,6 +374,55 @@ impl CodeBuffer { self.buf.extend(bytes); } + /// Print `n` tab characters into the buffer (indentation). + /// + /// Optimized on assumption that more that 16 levels of indentation is rare. + /// + /// Fast path is to write 16 bytes of tabs in a single load + store, + /// but only advance `len` by `n` bytes. This avoids a `memset` function call. + /// + /// Take alternative slow path if either: + /// 1. `n > 16`. + /// 2. Less than 16 bytes spare capacity in buffer (needs to grow). + /// Both of these cases should be rare. + /// + /// + #[inline] + pub fn print_indent(&mut self, n: usize) { + /// Size of chunks to write indent in. + /// 16 is largest register size (XMM) available on all x86_84 targets. + const CHUNK_SIZE: usize = 16; + + #[cold] + #[inline(never)] + fn write_slow(code_buffer: &mut CodeBuffer, n: usize) { + code_buffer.buf.extend(std::iter::repeat(b'\t').take(n)); + } + + let len = self.len(); + let spare_capacity = self.capacity() - len; + if n > CHUNK_SIZE || spare_capacity < CHUNK_SIZE { + write_slow(self, n); + return; + } + + // Write 16 tabs into buffer. + // On x86_86, this is 1 XMM register load + 1 XMM store (16 byte copy). + // SAFETY: We checked there are at least 16 bytes spare capacity. + unsafe { + let ptr = self.buf.as_mut_ptr().add(len).cast::<[u8; CHUNK_SIZE]>(); + ptr.write([b'\t'; CHUNK_SIZE]); + } + + // Update length of buffer. + // SAFETY: We checked there's at least 16 bytes spare capacity, and `n <= 16`, + // so `len + n` cannot exceed capacity. + // `len` cannot exceed `isize::MAX`, so `len + n` cannot wrap around. + unsafe { + self.buf.set_len(len + n); + } + } + /// Get contents of buffer as a byte slice. /// /// # Example diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index 5ec657a3790c4..29a478abd04e6 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -350,10 +350,7 @@ impl<'a> Codegen<'a> { self.print_next_indent_as_space = false; return; } - // SAFETY: this iterator only yields tabs, which are always valid ASCII characters. - unsafe { - self.code.print_bytes_unchecked(std::iter::repeat(b'\t').take(self.indent as usize)); - } + self.code.print_indent(self.indent as usize); } #[inline]