-
Notifications
You must be signed in to change notification settings - Fork 29.8k
[turbopack] mark rcstr! allocated Rcstr values as 'static' and stop refcounting them
#81994
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
99ee3dc
e13c742
68d720e
cfebbf3
165ee92
71720e4
b0f61cb
6353683
5493b7b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,12 +3,37 @@ use std::{num::NonZeroU8, ptr::NonNull}; | |
| use triomphe::Arc; | ||
|
|
||
| use crate::{ | ||
| INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, RcStr, TAG_MASK, | ||
| INLINE_TAG, INLINE_TAG_INIT, LEN_OFFSET, RcStr, STATIC_TAG, TAG_MASK, | ||
| tagged_value::{MAX_INLINE_LEN, TaggedValue}, | ||
| }; | ||
|
|
||
| pub(crate) struct PrehashedString { | ||
| pub value: String, | ||
| pub enum Payload { | ||
| String(String), | ||
| Ref(&'static str), | ||
| } | ||
|
|
||
| impl Payload { | ||
| pub(crate) fn as_str(&self) -> &str { | ||
| match self { | ||
| Payload::String(s) => s, | ||
| Payload::Ref(s) => s, | ||
| } | ||
| } | ||
| pub(crate) fn into_string(self) -> String { | ||
| match self { | ||
| Payload::String(s) => s, | ||
| Payload::Ref(r) => r.to_string(), | ||
| } | ||
| } | ||
| } | ||
| impl PartialEq for Payload { | ||
| fn eq(&self, other: &Self) -> bool { | ||
| self.as_str() == other.as_str() | ||
| } | ||
| } | ||
|
|
||
| pub struct PrehashedString { | ||
| pub value: Payload, | ||
| /// This is not the actual `fxhash`, but rather it's a value that passed to | ||
| /// `write_u64` of [rustc_hash::FxHasher]. | ||
| pub hash: u64, | ||
|
|
@@ -46,7 +71,7 @@ pub(crate) fn new_atom<T: AsRef<str> + Into<String>>(text: T) -> RcStr { | |
| let hash = hash_bytes(text.as_ref().as_bytes()); | ||
|
|
||
| let entry: Arc<PrehashedString> = Arc::new(PrehashedString { | ||
| value: text.into(), | ||
| value: Payload::String(text.into()), | ||
| hash, | ||
| }); | ||
| let entry = Arc::into_raw(entry); | ||
|
|
@@ -61,6 +86,22 @@ pub(crate) fn new_atom<T: AsRef<str> + Into<String>>(text: T) -> RcStr { | |
| } | ||
| } | ||
|
|
||
| #[inline(always)] | ||
| pub(crate) fn new_static_atom(string: &'static PrehashedString) -> RcStr { | ||
| let mut entry = string as *const PrehashedString; | ||
| debug_assert!(0 == entry as u8 & TAG_MASK); | ||
| // Tag it as a static pointer | ||
| entry = ((entry as usize) | STATIC_TAG as usize) as *mut PrehashedString; | ||
| let ptr: NonNull<PrehashedString> = unsafe { | ||
| // Safety: Box::into_raw returns a non-null pointer | ||
lukesandberg marked this conversation as resolved.
Show resolved
Hide resolved
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The safety comment for View DetailsAnalysisThe safety comment on line 96 is misleading and incorrect. The function The actual safety requirement for
The incorrect comment could mislead future maintainers about the actual safety invariants. RecommendationReplace the incorrect safety comment with an accurate one: // Safety: entry is non-null because it's derived from a valid reference and tagging preserves non-null-ness
NonNull::new_unchecked(entry as *mut _) |
||
| NonNull::new_unchecked(entry as *mut _) | ||
| }; | ||
|
|
||
| RcStr { | ||
| unsafe_data: TaggedValue::new_ptr(ptr), | ||
| } | ||
| } | ||
|
|
||
| /// Attempts to construct an RcStr but only if it can be constructed inline. | ||
| /// This is primarily useful in constant contexts. | ||
| #[doc(hidden)] | ||
|
|
@@ -90,7 +131,7 @@ const SEED2: u64 = 0x13198a2e03707344; | |
| const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0; | ||
|
|
||
| #[inline] | ||
| fn multiply_mix(x: u64, y: u64) -> u64 { | ||
| const fn multiply_mix(x: u64, y: u64) -> u64 { | ||
| #[cfg(target_pointer_width = "64")] | ||
| { | ||
| // We compute the full u64 x u64 -> u128 product, this is a single mul | ||
|
|
@@ -131,6 +172,26 @@ fn multiply_mix(x: u64, y: u64) -> u64 { | |
| } | ||
| } | ||
|
|
||
| // Const compatible helper function to read a u64 from a byte array at a given offset | ||
| const fn read_u64_le(bytes: &[u8], offset: usize) -> u64 { | ||
| (bytes[offset] as u64) | ||
| | ((bytes[offset + 1] as u64) << 8) | ||
| | ((bytes[offset + 2] as u64) << 16) | ||
| | ((bytes[offset + 3] as u64) << 24) | ||
| | ((bytes[offset + 4] as u64) << 32) | ||
| | ((bytes[offset + 5] as u64) << 40) | ||
| | ((bytes[offset + 6] as u64) << 48) | ||
| | ((bytes[offset + 7] as u64) << 56) | ||
| } | ||
|
|
||
| // Const compatible helper function to read a u32 from a byte array at a given offset | ||
| const fn read_u32_le(bytes: &[u8], offset: usize) -> u32 { | ||
| (bytes[offset] as u32) | ||
| | ((bytes[offset + 1] as u32) << 8) | ||
| | ((bytes[offset + 2] as u32) << 16) | ||
| | ((bytes[offset + 3] as u32) << 24) | ||
| } | ||
|
|
||
| /// Copied from `hash_bytes` of `rustc-hash`. | ||
| /// | ||
| /// See: https://github.com/rust-lang/rustc-hash/blob/dc5c33f1283de2da64d8d7a06401d91aded03ad4/src/lib.rs#L252-L297 | ||
|
|
@@ -149,19 +210,20 @@ fn multiply_mix(x: u64, y: u64) -> u64 { | |
| /// We don't bother avalanching here as we'll feed this hash into a | ||
| /// multiplication after which we take the high bits, which avalanches for us. | ||
| #[inline] | ||
| fn hash_bytes(bytes: &[u8]) -> u64 { | ||
| #[doc(hidden)] | ||
| pub const fn hash_bytes(bytes: &[u8]) -> u64 { | ||
| let len = bytes.len(); | ||
| let mut s0 = SEED1; | ||
| let mut s1 = SEED2; | ||
|
|
||
| if len <= 16 { | ||
| // XOR the input into s0, s1. | ||
| if len >= 8 { | ||
| s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap()); | ||
| s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap()); | ||
| s0 ^= read_u64_le(bytes, 0); | ||
| s1 ^= read_u64_le(bytes, len - 8); | ||
| } else if len >= 4 { | ||
| s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64; | ||
| s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64; | ||
| s0 ^= read_u32_le(bytes, 0) as u64; | ||
| s1 ^= read_u32_le(bytes, len - 4) as u64; | ||
| } else if len > 0 { | ||
| let lo = bytes[0]; | ||
| let mid = bytes[len / 2]; | ||
|
|
@@ -173,8 +235,8 @@ fn hash_bytes(bytes: &[u8]) -> u64 { | |
| // Handle bulk (can partially overlap with suffix). | ||
| let mut off = 0; | ||
| while off < len - 16 { | ||
| let x = u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap()); | ||
| let y = u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap()); | ||
| let x = read_u64_le(bytes, off); | ||
| let y = read_u64_le(bytes, off + 8); | ||
|
|
||
| // Replace s1 with a mix of s0, x, and y, and s0 with s1. | ||
| // This ensures the compiler can unroll this loop into two | ||
|
|
@@ -188,9 +250,8 @@ fn hash_bytes(bytes: &[u8]) -> u64 { | |
| off += 16; | ||
| } | ||
|
|
||
| let suffix = &bytes[len - 16..]; | ||
| s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap()); | ||
| s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap()); | ||
| s0 ^= read_u64_le(bytes, len - 16); | ||
| s1 ^= read_u64_le(bytes, len - 8); | ||
| } | ||
|
|
||
| multiply_mix(s0, s1) ^ (len as u64) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,7 @@ use triomphe::Arc; | |
| use turbo_tasks_hash::{DeterministicHash, DeterministicHasher}; | ||
|
|
||
| use crate::{ | ||
| dynamic::{deref_from, new_atom}, | ||
| dynamic::{deref_from, hash_bytes, new_atom}, | ||
| tagged_value::TaggedValue, | ||
| }; | ||
|
|
||
|
|
@@ -71,35 +71,60 @@ pub struct RcStr { | |
| unsafe_data: TaggedValue, | ||
| } | ||
|
|
||
| const _: () = { | ||
| // Enforce that RcStr triggers the non-zero size optimization. | ||
| assert!(std::mem::size_of::<RcStr>() == std::mem::size_of::<Option<RcStr>>()); | ||
| }; | ||
|
|
||
| unsafe impl Send for RcStr {} | ||
| unsafe impl Sync for RcStr {} | ||
|
|
||
| // Marks a payload that is stored in an Arc | ||
| const DYNAMIC_TAG: u8 = 0b_00; | ||
| const DYNAMIC_LOCATION: u8 = 0b_0; | ||
| // Marks a payload that has been leaked since it has a static lifetime | ||
| const STATIC_TAG: u8 = 0b_10; | ||
| // The payload is stored inline | ||
| const INLINE_TAG: u8 = 0b_01; // len in upper nybble | ||
| const INLINE_LOCATION: u8 = 0b_1; | ||
| const INLINE_TAG_INIT: NonZeroU8 = NonZeroU8::new(INLINE_TAG).unwrap(); | ||
| const TAG_MASK: u8 = 0b_11; | ||
| const LOCATION_MASK: u8 = 0b_1; | ||
| // For inline tags the length is stored in the upper 4 bits of the tag byte | ||
| const LEN_OFFSET: usize = 4; | ||
| const LEN_MASK: u8 = 0xf0; | ||
|
|
||
| impl RcStr { | ||
| #[inline(always)] | ||
| fn tag(&self) -> u8 { | ||
| self.unsafe_data.tag() & TAG_MASK | ||
| self.unsafe_data.tag_byte() & TAG_MASK | ||
| } | ||
| #[inline(always)] | ||
| fn location(&self) -> u8 { | ||
| self.unsafe_data.tag_byte() & LOCATION_MASK | ||
| } | ||
|
|
||
| #[inline(never)] | ||
| pub fn as_str(&self) -> &str { | ||
| match self.tag() { | ||
| DYNAMIC_TAG => unsafe { dynamic::deref_from(self.unsafe_data).value.as_str() }, | ||
| INLINE_TAG => { | ||
| let len = (self.unsafe_data.tag() & LEN_MASK) >> LEN_OFFSET; | ||
| let src = self.unsafe_data.data(); | ||
| unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) } | ||
| } | ||
| match self.location() { | ||
| DYNAMIC_LOCATION => self.dynamic_as_str(), | ||
| INLINE_LOCATION => self.inline_as_str(), | ||
| _ => unsafe { debug_unreachable!() }, | ||
| } | ||
| } | ||
|
|
||
| fn inline_as_str(&self) -> &str { | ||
| let len = (self.unsafe_data.tag_byte() & LEN_MASK) >> LEN_OFFSET; | ||
| let src = self.unsafe_data.data(); | ||
| unsafe { std::str::from_utf8_unchecked(&src[..(len as usize)]) } | ||
| } | ||
|
|
||
| // Extract the str reference from a string stored in a dynamic location | ||
| fn dynamic_as_str(&self) -> &str { | ||
| debug_assert!(self.location() == DYNAMIC_LOCATION); | ||
| unsafe { dynamic::deref_from(self.unsafe_data).value.as_str() } | ||
| } | ||
|
|
||
| /// Returns an owned mutable [`String`]. | ||
| /// | ||
| /// This implementation is more efficient than [`ToString::to_string`]: | ||
|
|
@@ -113,31 +138,19 @@ impl RcStr { | |
| // convert `self` into `arc` | ||
| let arc = unsafe { dynamic::restore_arc(ManuallyDrop::new(self).unsafe_data) }; | ||
| match Arc::try_unwrap(arc) { | ||
| Ok(v) => v.value, | ||
| Err(arc) => arc.value.to_string(), | ||
| Ok(v) => v.value.into_string(), | ||
| Err(arc) => arc.value.as_str().to_string(), | ||
| } | ||
| } | ||
| INLINE_TAG => self.as_str().to_string(), | ||
| INLINE_TAG => self.inline_as_str().to_string(), | ||
| STATIC_TAG => self.dynamic_as_str().to_string(), | ||
| _ => unsafe { debug_unreachable!() }, | ||
| } | ||
| } | ||
|
|
||
| pub fn map(self, f: impl FnOnce(String) -> String) -> Self { | ||
| RcStr::from(Cow::Owned(f(self.into_owned()))) | ||
| } | ||
|
|
||
| #[inline] | ||
| pub(crate) fn from_alias(alias: TaggedValue) -> Self { | ||
| if alias.tag() & TAG_MASK == DYNAMIC_TAG { | ||
| unsafe { | ||
| let arc = dynamic::restore_arc(alias); | ||
| forget(arc.clone()); | ||
| forget(arc); | ||
| } | ||
| } | ||
|
|
||
| Self { unsafe_data: alias } | ||
| } | ||
| } | ||
|
|
||
| impl DeterministicHash for RcStr { | ||
|
|
@@ -264,7 +277,16 @@ impl From<RcStr> for PathBuf { | |
| impl Clone for RcStr { | ||
| #[inline(always)] | ||
| fn clone(&self) -> Self { | ||
| Self::from_alias(self.unsafe_data) | ||
| let alias = self.unsafe_data; | ||
| if alias.tag_byte() & TAG_MASK == DYNAMIC_TAG { | ||
| unsafe { | ||
| let arc = dynamic::restore_arc(alias); | ||
| forget(arc.clone()); | ||
| forget(arc); | ||
| } | ||
| } | ||
|
|
||
| RcStr { unsafe_data: alias } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -276,13 +298,20 @@ impl Default for RcStr { | |
|
|
||
| impl PartialEq for RcStr { | ||
| fn eq(&self, other: &Self) -> bool { | ||
| match (self.tag(), other.tag()) { | ||
lukesandberg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| (DYNAMIC_TAG, DYNAMIC_TAG) => { | ||
| // For inline RcStrs this is sufficient and for out of line values it handles a simple | ||
lukesandberg marked this conversation as resolved.
Show resolved
Hide resolved
lukesandberg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // identity cases | ||
| if self.unsafe_data == other.unsafe_data { | ||
lukesandberg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return true; | ||
| } | ||
| // They can still be equal if they are both stored on the heap | ||
| match (self.location(), other.location()) { | ||
lukesandberg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| (DYNAMIC_LOCATION, DYNAMIC_LOCATION) => { | ||
| let l = unsafe { deref_from(self.unsafe_data) }; | ||
| let r = unsafe { deref_from(other.unsafe_data) }; | ||
| l.hash == r.hash && l.value == r.value | ||
| } | ||
| (INLINE_TAG, INLINE_TAG) => self.unsafe_data == other.unsafe_data, | ||
| // NOTE: it is never possible for an inline storage string to compare equal to a dynamic | ||
| // allocated string, the construction routines separate the strings based on length. | ||
| _ => false, | ||
| } | ||
| } | ||
|
|
@@ -304,13 +333,13 @@ impl Ord for RcStr { | |
|
|
||
| impl Hash for RcStr { | ||
| fn hash<H: Hasher>(&self, state: &mut H) { | ||
| match self.tag() { | ||
| DYNAMIC_TAG => { | ||
| match self.location() { | ||
| DYNAMIC_LOCATION => { | ||
| let l = unsafe { deref_from(self.unsafe_data) }; | ||
| state.write_u64(l.hash); | ||
| state.write_u8(0xff); | ||
| } | ||
| INLINE_TAG => { | ||
| INLINE_LOCATION => { | ||
| self.as_str().hash(state); | ||
| } | ||
| _ => unsafe { debug_unreachable!() }, | ||
|
|
@@ -339,27 +368,45 @@ impl Drop for RcStr { | |
| } | ||
| } | ||
|
|
||
| // Exports for our macro | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The View DetailsAnalysisThe Currently, when a static string is dropped, the The issue is that the code doesn't make it clear that static strings should not be processed in the RecommendationUpdate the impl Drop for RcStr {
fn drop(&mut self) {
match self.tag() {
DYNAMIC_TAG => unsafe { drop(dynamic::restore_arc(self.unsafe_data)) },
STATIC_TAG => {
// Static strings are never dropped - they have static lifetime
// No action needed
},
INLINE_TAG => {
// Inline strings are stored directly in the tagged value
// No action needed
},
_ => unsafe { debug_unreachable!() },
}
}
}This makes the behavior explicit and prevents future bugs if tag values change. |
||
| #[doc(hidden)] | ||
| pub const fn inline_atom(s: &str) -> Option<RcStr> { | ||
| dynamic::inline_atom(s) | ||
| } | ||
|
|
||
| #[doc(hidden)] | ||
| #[inline(always)] | ||
| pub fn from_static(s: &'static PrehashedString) -> RcStr { | ||
| dynamic::new_static_atom(s) | ||
| } | ||
| #[doc(hidden)] | ||
| pub use dynamic::PrehashedString; | ||
|
|
||
| #[doc(hidden)] | ||
| pub const fn make_const_prehashed_string(text: &'static str) -> PrehashedString { | ||
| PrehashedString { | ||
| value: dynamic::Payload::Ref(text), | ||
| hash: hash_bytes(text.as_bytes()), | ||
| } | ||
| } | ||
|
|
||
| /// Create an rcstr from a string literal. | ||
| /// allocates the RcStr inline when possible otherwise uses a `LazyLock` to manage the allocation. | ||
| #[macro_export] | ||
| macro_rules! rcstr { | ||
| ($s:expr) => {{ | ||
| const INLINE: core::option::Option<$crate::RcStr> = $crate::inline_atom($s); | ||
| // this condition should be able to be compile time evaluated and inlined. | ||
| // This condition can be compile time evaluated and inlined. | ||
| if INLINE.is_some() { | ||
| INLINE.unwrap() | ||
| } else { | ||
| #[inline(never)] | ||
| fn get_rcstr() -> $crate::RcStr { | ||
| static CACHE: std::sync::LazyLock<$crate::RcStr> = | ||
| std::sync::LazyLock::new(|| $crate::RcStr::from($s)); | ||
|
|
||
| (*CACHE).clone() | ||
| // Allocate static storage for the PrehashedString | ||
| static RCSTR_STORAGE: $crate::PrehashedString = | ||
| $crate::make_const_prehashed_string($s); | ||
| // This basically just tags a bit onto the raw pointer and wraps it in an RcStr | ||
| // should be fast enough to do every time. | ||
| $crate::from_static(&RCSTR_STORAGE) | ||
| } | ||
| get_rcstr() | ||
| } | ||
|
|
@@ -460,6 +507,15 @@ mod tests { | |
| assert_eq!(rcstr!("abcdefghi"), RcStr::from("abcdefghi")); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_static_atom() { | ||
| const LONG: &str = "a very long string that lives forever"; | ||
| let leaked = rcstr!(LONG); | ||
| let not_leaked = RcStr::from(LONG); | ||
| assert_ne!(leaked.tag(), not_leaked.tag()); | ||
| assert_eq!(leaked, not_leaked); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_inline_atom() { | ||
| // This is a silly test, just asserts that we can evaluate this in a constant context. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.