diff --git a/Cargo.lock b/Cargo.lock index 09b4fed60edb5..fb6817348a005 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1546,6 +1546,7 @@ dependencies = [ "once_cell", "oxc_allocator", "oxc_ast", + "oxc_data_structures", "oxc_index", "oxc_mangler", "oxc_parser", diff --git a/crates/oxc_codegen/Cargo.toml b/crates/oxc_codegen/Cargo.toml index 1ba96c14f8414..55bfb2038610b 100644 --- a/crates/oxc_codegen/Cargo.toml +++ b/crates/oxc_codegen/Cargo.toml @@ -22,6 +22,7 @@ doctest = false [dependencies] oxc_allocator = { workspace = true } oxc_ast = { workspace = true } +oxc_data_structures = { workspace = true } oxc_index = { workspace = true } oxc_mangler = { workspace = true } oxc_sourcemap = { workspace = true } diff --git a/crates/oxc_codegen/src/code_buffer.rs b/crates/oxc_codegen/src/code_buffer.rs index 34ade405f163c..db7674110d539 100644 --- a/crates/oxc_codegen/src/code_buffer.rs +++ b/crates/oxc_codegen/src/code_buffer.rs @@ -1,5 +1,7 @@ use assert_unchecked::assert_unchecked; +use oxc_data_structures::AsciiChar; + /// A string builder for constructing source code. /// /// `CodeBuffer` provides safe abstractions over a byte array. @@ -173,6 +175,9 @@ impl CodeBuffer { /// Push a single ASCII byte into the buffer. /// + /// If `byte` is not statically knowable, consider [`print_ascii_char`] + /// as a more efficient alternative. + /// /// # Panics /// Panics if `byte` is not an ASCII byte (`0 - 0x7F`). /// @@ -187,6 +192,8 @@ impl CodeBuffer { /// let source = code.into_string(); /// assert_eq!(source, "foo"); /// ``` + /// + /// [`print_ascii_char`]: CodeBuffer::print_ascii_char #[inline] pub fn print_ascii_byte(&mut self, byte: u8) { // When this method is inlined, and the value of `byte` is known, this assertion should @@ -197,6 +204,37 @@ impl CodeBuffer { unsafe { self.print_byte_unchecked(byte) } } + /// Push a single `AsciiChar` into the buffer. + /// + /// [`print_ascii_byte`] is usually more ergonomic, and will produce equally efficient code, + /// as long as `ch` is statically knowable. However, if `ch` is dynamic and compiler cannot + /// statically prove it is an ASCII character, compiler will be unable to elide an assertion + /// in [`print_ascii_byte`]. + /// + /// `print_ascii_char` is slightly more efficient in such cases, because it is always true that + /// `AsciiChar` is an ASCII character, by definition. + /// + /// # Example + /// ``` + /// use oxc_codegen::CodeBuffer; + /// use oxc_data_structures::AsciiChar; + /// + /// let mut code = CodeBuffer::new(); + /// code.print_ascii_char(AsciiChar::SmallF); + /// code.print_ascii_char(AsciiChar::SmallO); + /// code.print_ascii_char(AsciiChar::SmallO); + /// + /// let source = code.take_source_text(); + /// assert_eq!(source, "foo"); + /// ``` + /// + /// [`print_ascii_byte`]: CodeBuffer::print_ascii_byte + #[inline] + pub fn print_ascii_char(&mut self, ch: AsciiChar) { + // SAFETY: `AsciiChar` is guaranteed to be an ASCII character + unsafe { self.print_byte_unchecked(ch.to_u8()) } + } + /// Push a byte to the buffer, without checking that the buffer still represents a valid /// UTF-8 string. /// @@ -332,6 +370,26 @@ impl CodeBuffer { } } + /// Push a sequence of `AsciiChar`s into the buffer. + /// + /// # Example + /// ``` + /// use oxc_codegen::CodeBuffer; + /// use oxc_data_structures::AsciiChar; + /// + /// let mut code = CodeBuffer::new(); + /// code.print_ascii_chars( + /// [AsciiChar::SmallF, AsciiChar::SmallO, AsciiChar::SmallO] + /// ); + /// assert_eq!(String::from(code), "foo"); + /// ``` + pub fn print_ascii_chars(&mut self, chars: I) + where + I: IntoIterator, + { + self.buf.extend(chars.into_iter().map(AsciiChar::to_u8)); + } + /// Print a sequence of bytes without checking that the buffer still /// represents a valid UTF-8 string. /// @@ -418,7 +476,7 @@ impl From for String { #[cfg(test)] mod test { - use super::CodeBuffer; + use super::{AsciiChar, CodeBuffer}; #[test] fn empty() { @@ -460,6 +518,19 @@ mod test { assert_eq!(String::from(code), "foo"); } + #[test] + #[allow(clippy::byte_char_slices)] + fn print_ascii_char() { + let mut code = CodeBuffer::new(); + code.print_ascii_char(AsciiChar::SmallF); + code.print_ascii_char(AsciiChar::SmallO); + code.print_ascii_char(AsciiChar::SmallO); + + assert_eq!(code.len(), 3); + assert_eq!(code.as_bytes(), &[b'f', b'o', b'o']); + assert_eq!(String::from(code), "foo"); + } + #[test] #[allow(clippy::byte_char_slices)] fn print_byte_unchecked() { @@ -487,6 +558,17 @@ mod test { assert_eq!(String::from(code), "foo"); } + #[test] + #[allow(clippy::byte_char_slices)] + fn print_ascii_chars() { + let mut code = CodeBuffer::new(); + code.print_ascii_chars([AsciiChar::SmallF, AsciiChar::SmallO, AsciiChar::SmallO]); + + assert_eq!(code.len(), 3); + assert_eq!(code.as_bytes(), &[b'f', b'o', b'o']); + assert_eq!(String::from(code), "foo"); + } + #[test] fn peek_nth_char_back() { let mut code = CodeBuffer::new(); diff --git a/crates/oxc_codegen/src/gen.rs b/crates/oxc_codegen/src/gen.rs index d33f2836831ab..e5a380658d681 100644 --- a/crates/oxc_codegen/src/gen.rs +++ b/crates/oxc_codegen/src/gen.rs @@ -3,6 +3,7 @@ use std::ops::Not; use cow_utils::CowUtils; #[allow(clippy::wildcard_imports)] use oxc_ast::ast::*; +use oxc_data_structures::AsciiChar; use oxc_span::GetSpan; use oxc_syntax::{ identifier::{LS, PS}, @@ -1210,7 +1211,7 @@ impl<'a> Gen for RegExpLiteral<'a> { } } -fn print_unquoted_str(s: &str, quote: u8, p: &mut Codegen) { +fn print_unquoted_str(s: &str, quote: AsciiChar, p: &mut Codegen) { let mut chars = s.chars().peekable(); while let Some(c) = chars.next() { @@ -1250,21 +1251,21 @@ fn print_unquoted_str(s: &str, quote: u8, p: &mut Codegen) { p.print_str("\\\\"); } '\'' => { - if quote == b'\'' { + if quote == AsciiChar::SingleQuote { p.print_str("\\'"); } else { p.print_str("'"); } } '\"' => { - if quote == b'"' { + if quote == AsciiChar::DoubleQuote { p.print_str("\\\""); } else { p.print_str("\""); } } '`' => { - if quote == b'`' { + if quote == AsciiChar::GraveAccent { p.print_str("\\`"); } else { p.print_str("`"); @@ -2335,10 +2336,14 @@ impl<'a> Gen for JSXAttributeValue<'a> { Self::Fragment(fragment) => fragment.print(p, ctx), Self::Element(el) => el.print(p, ctx), Self::StringLiteral(lit) => { - let quote = if lit.value.contains('"') { b'\'' } else { b'"' }; - p.print_ascii_byte(quote); + let quote = if lit.value.contains('"') { + AsciiChar::SingleQuote + } else { + AsciiChar::DoubleQuote + }; + p.print_ascii_char(quote); p.print_str(&lit.value); - p.print_ascii_byte(quote); + p.print_ascii_char(quote); } Self::ExpressionContainer(expr_container) => expr_container.print(p, ctx), } diff --git a/crates/oxc_codegen/src/lib.rs b/crates/oxc_codegen/src/lib.rs index 27cca0eb325c3..876a2d81ed409 100644 --- a/crates/oxc_codegen/src/lib.rs +++ b/crates/oxc_codegen/src/lib.rs @@ -16,6 +16,7 @@ use std::{borrow::Cow, path::PathBuf}; use oxc_ast::ast::{ BindingIdentifier, BlockStatement, Expression, IdentifierReference, Program, Statement, }; +use oxc_data_structures::AsciiChar; use oxc_mangler::Mangler; use oxc_span::{GetSpan, Span}; use oxc_syntax::{ @@ -138,7 +139,7 @@ pub struct Codegen<'a> { indent: u32, /// Fast path for [CodegenOptions::single_quote] - quote: u8, + quote: AsciiChar, // Builders sourcemap_builder: Option, @@ -184,14 +185,15 @@ impl<'a> Codegen<'a> { start_of_arrow_expr: 0, start_of_default_export: 0, indent: 0, - quote: b'"', + quote: AsciiChar::DoubleQuote, sourcemap_builder: None, } } #[must_use] pub fn with_options(mut self, options: CodegenOptions) -> Self { - self.quote = if options.single_quote { b'\'' } else { b'"' }; + self.quote = + if options.single_quote { AsciiChar::SingleQuote } else { AsciiChar::DoubleQuote }; self.options = options; self } @@ -204,7 +206,8 @@ impl<'a> Codegen<'a> { #[must_use] pub fn build(mut self, program: &Program<'a>) -> CodegenReturn { - self.quote = if self.options.single_quote { b'\'' } else { b'"' }; + self.quote = + if self.options.single_quote { AsciiChar::SingleQuote } else { AsciiChar::DoubleQuote }; self.source_text = program.source_text; self.code.reserve(program.source_text.len()); if self.options.print_annotation_comments() { @@ -234,6 +237,12 @@ impl<'a> Codegen<'a> { self.code.print_ascii_byte(byte); } + /// Push a single `AsciiChar` into the buffer. + #[inline] + pub fn print_ascii_char(&mut self, ch: AsciiChar) { + self.code.print_ascii_char(ch); + } + /// Push str into the buffer #[inline] pub fn print_str(&mut self, s: &str) { @@ -343,10 +352,7 @@ impl<'a> Codegen<'a> { self.print_next_indent_as_space = false; return; } - // SAFETY: this iterator only yields tabs, which are always valid ASCII characters. - unsafe { - self.code.print_bytes_unchecked(std::iter::repeat(b'\t').take(self.indent as usize)); - } + self.code.print_ascii_chars(std::iter::repeat(AsciiChar::Tab).take(self.indent as usize)); } #[inline] @@ -569,10 +575,10 @@ impl<'a> Codegen<'a> { } #[inline] - fn wrap_quote(&mut self, mut f: F) { - self.print_ascii_byte(self.quote); + fn wrap_quote(&mut self, mut f: F) { + self.print_ascii_char(self.quote); f(self, self.quote); - self.print_ascii_byte(self.quote); + self.print_ascii_char(self.quote); } fn add_source_mapping(&mut self, position: u32) {