diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index a50d2e4903b2b..bdf5e4b4b55db 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -29,11 +29,15 @@ const KIND_MASK: u128 = 0xFF; // 8 bits const BOOL_MASK: u128 = 0xFF; // 8 bits const _: () = { - // Check flags fields are aligned on 8 - assert!(IS_ON_NEW_LINE_SHIFT % 8 == 0); - assert!(ESCAPED_SHIFT % 8 == 0); - assert!(LONE_SURROGATES_SHIFT % 8 == 0); - assert!(HAS_SEPARATOR_SHIFT % 8 == 0); + // Check flags fields are aligned on 8 and in bounds, so can be read via pointers + const fn is_valid_shift(shift: usize) -> bool { + shift % 8 == 0 && shift < u128::BITS as usize + } + + assert!(is_valid_shift(IS_ON_NEW_LINE_SHIFT)); + assert!(is_valid_shift(ESCAPED_SHIFT)); + assert!(is_valid_shift(LONE_SURROGATES_SHIFT)); + assert!(is_valid_shift(HAS_SEPARATOR_SHIFT)); }; #[derive(Debug, Clone, Copy)] @@ -113,15 +117,10 @@ impl Token { #[inline] pub fn is_on_new_line(&self) -> bool { + // Use a pointer read rather than arithmetic as it produces less instructions. // SAFETY: 8 bits starting at `IS_ON_NEW_LINE_SHIFT` are only set in `Token::default` and // `Token::set_is_on_new_line`. Both only set these bits to 0 or 1, so valid to read as a `bool`. - unsafe { - *ptr::from_ref(self) - .cast::() - .add(IS_ON_NEW_LINE_SHIFT / 8) - .as_ref() - .unwrap_unchecked() - } + unsafe { self.read_bool(IS_ON_NEW_LINE_SHIFT) } } #[inline] @@ -132,11 +131,10 @@ impl Token { #[inline] pub fn escaped(&self) -> bool { + // Use a pointer read rather than arithmetic as it produces less instructions. // SAFETY: 8 bits starting at `ESCAPED_SHIFT` are only set in `Token::default` and // `Token::set_escaped`. Both only set these bits to 0 or 1, so valid to read as a `bool`. - unsafe { - *ptr::from_ref(self).cast::().add(ESCAPED_SHIFT / 8).as_ref().unwrap_unchecked() - } + unsafe { self.read_bool(ESCAPED_SHIFT) } } #[inline] @@ -147,15 +145,10 @@ impl Token { #[inline] pub fn lone_surrogates(&self) -> bool { + // Use a pointer read rather than arithmetic as it produces less instructions. // SAFETY: 8 bits starting at `LONE_SURROGATES_SHIFT` are only set in `Token::default` and // `Token::set_lone_surrogates`. Both only set these bits to 0 or 1, so valid to read as a `bool`. - unsafe { - *ptr::from_ref(self) - .cast::() - .add(LONE_SURROGATES_SHIFT / 8) - .as_ref() - .unwrap_unchecked() - } + unsafe { self.read_bool(LONE_SURROGATES_SHIFT) } } #[inline] @@ -166,15 +159,10 @@ impl Token { #[inline] pub fn has_separator(&self) -> bool { + // Use a pointer read rather than arithmetic as it produces less instructions. // SAFETY: 8 bits starting at `HAS_SEPARATOR_SHIFT` are only set in `Token::default` and // `Token::set_has_separator`. Both only set these bits to 0 or 1, so valid to read as a `bool`. - unsafe { - *ptr::from_ref(self) - .cast::() - .add(HAS_SEPARATOR_SHIFT / 8) - .as_ref() - .unwrap_unchecked() - } + unsafe { self.read_bool(HAS_SEPARATOR_SHIFT) } } #[inline] @@ -182,6 +170,25 @@ impl Token { self.0 &= !(BOOL_MASK << HAS_SEPARATOR_SHIFT); // Clear current `has_separator` bits self.0 |= u128::from(value) << HAS_SEPARATOR_SHIFT; } + + /// Read `bool` from 8 bits starting at bit position `shift`. + /// + /// # SAFETY + /// `shift` must be the location of a valid boolean "field" in [`Token`] + /// e.g. `ESCAPED_SHIFT` + #[expect(clippy::inline_always)] + #[inline(always)] // So `shift` is statically known + unsafe fn read_bool(&self, shift: usize) -> bool { + // Byte offset depends on endianness of the system + let offset = if cfg!(target_endian = "little") { shift / 8 } else { 15 - (shift / 8) }; + // SAFETY: Caller guarantees `shift` points to valid `bool`. + // This method borrows `Token`, so valid to read field via a reference - can't be aliased. + unsafe { + let field_ptr = ptr::from_ref(self).cast::().add(offset); + debug_assert!(*field_ptr.cast::() <= 1); + *field_ptr.as_ref().unwrap_unchecked() + } + } } #[cfg(test)]