Skip to content

Commit

Permalink
Use inline for short strings
Browse files Browse the repository at this point in the history
Closes servo#276.
  • Loading branch information
overlookmotel committed Jul 5, 2023
1 parent bd3b942 commit 0ceca1f
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 25 deletions.
3 changes: 3 additions & 0 deletions integration-tests/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ fn main() {
"html",
"head",
"id",
"❤",
"❤💯",
"❤💯❤💯",
])
.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs"))
.unwrap()
Expand Down
19 changes: 11 additions & 8 deletions integration-tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ fn test_types() {
assert!(Atom::from("").is_static());
assert!(Atom::from("defaults").is_static());
assert!(Atom::from("font-weight").is_static());
assert!(Atom::from("id").is_static());
assert!(Atom::from("body").is_static());
assert!(Atom::from("a").is_static());
assert!(Atom::from("address").is_static());
assert!(Atom::from("id").is_inline());
assert!(Atom::from("body").is_inline());
assert!(Atom::from("a").is_inline());
assert!(Atom::from("address").is_inline());
assert!(Atom::from("c").is_inline());
assert!(Atom::from("zz").is_inline());
assert!(Atom::from("zzz").is_inline());
Expand Down Expand Up @@ -173,11 +173,11 @@ fn repr() {
// Static atoms
check_static("defaults", test_atom!("defaults"));
check_static("font-weight", test_atom!("font-weight"));
check_static("a", test_atom!("a"));
check_static("address", test_atom!("address"));
check_static("area", test_atom!("area"));

// Inline atoms
check("a", 0x0000_0000_0000_6111);
check("address", 0x7373_6572_6464_6171);
check("area", 0x0000_0061_6572_6141);
check("e", 0x0000_0000_0000_6511);
check("xyzzy", 0x0000_797A_7A79_7851);
check("xyzzy01", 0x3130_797A_7A79_7871);
Expand All @@ -201,7 +201,10 @@ fn atom_macro() {
assert_eq!(test_atom!("a"), Atom::from("a"));
assert_eq!(test_atom!("body"), Atom::from("body"));
assert_eq!(test_atom!("address"), Atom::from("address"));
assert_eq!(test_atom!("❤"), Atom::from("❤"));
assert_eq!(test_atom!("❤💯"), Atom::from("❤💯"));
assert_eq!(test_atom!("font-weight"), Atom::from("font-weight"));
assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯"));
}

#[test]
Expand Down Expand Up @@ -300,7 +303,7 @@ fn test_from_string() {
#[test]
fn test_try_static() {
assert!(Atom::try_static("defaults").is_some());
assert!(Atom::try_static("head").is_some());
assert!(Atom::try_static("head").is_none());
assert!(Atom::try_static("not in the static table").is_none());
}

Expand Down
59 changes: 43 additions & 16 deletions src/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,31 @@ impl<Static> Atom<Static> {
}
}

/// For the atom!() macros
#[inline(always)]
#[doc(hidden)]
pub const fn pack_inline(n: u64, len: u8) -> Self {
// Reverse byte order if big-endian
let n = if cfg!(target_endian = "big") {
((n & 0x000000000000FF00) << 48)
| ((n & 0x0000000000FF0000) << 32)
| ((n & 0x00000000FF000000) << 16)
| (n & 0x000000FF00000000)
| ((n & 0x0000FF0000000000) >> 16)
| ((n & 0x00FF000000000000) >> 32)
| ((n & 0xFF00000000000000) >> 48)
} else {
n
};

let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
Self {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}

fn tag(&self) -> u8 {
(self.unsafe_data.get() & TAG_MASK) as u8
}
Expand Down Expand Up @@ -186,20 +211,22 @@ impl<Static: StaticAtomSet> Hash for Atom<Static> {

impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
fn from(string_to_add: Cow<'a, str>) -> Self {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let len = string_to_add.len();
if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes())
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
let len = string_to_add.len();
if len == 0 {
Self::pack_static(Static::empty_string_index())
} else if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes())
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let ptr: std::ptr::NonNull<Entry> = DYNAMIC_SET.insert(string_to_add, hash.g);
let data = ptr.as_ptr() as u64;
debug_assert!(0 == data & TAG_MASK);
Expand All @@ -208,8 +235,8 @@ impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}
})
})
}
}
}

Expand Down
43 changes: 42 additions & 1 deletion string-cache-codegen/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,13 @@ impl AtomType {
// which would cause divisions by zero in rust-phf.
self.atoms.insert(String::new());

let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect();
// Strings over 7 bytes and empty string added to static set
let atoms: Vec<&str> = self
.atoms
.iter()
.filter(|s| s.len() > 7 || s.is_empty())
.map(|s| &**s)
.collect();
let hash_state = phf_generator::generate_hash(&atoms);
let phf_generator::HashState { key, disps, map } = hash_state;
let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip();
Expand Down Expand Up @@ -239,6 +245,35 @@ impl AtomType {
})
.collect();

// Strings 7 bytes or less (except empty string) stored inline
let short_strs: Vec<&str> = self
.atoms
.iter()
.filter(|s| s.len() < 8 && !s.is_empty())
.map(|s| &**s)
.collect();
let short_const_names: Vec<_> = short_strs
.iter()
.map(|s| {
let mut name = atom_prefix.clone();
for c in s.chars() {
name.push_str(&format!("_{:02X}", c as u32))
}
new_term(&name)
})
.collect();
let short_values: Vec<_> = short_strs
.iter()
.map(|s| {
let mut n = 0u64;
for (index, c) in s.bytes().enumerate() {
n = n | ((c as u64) << (index * 8 + 8));
}
n
})
.collect();
let short_lens: Vec<_> = short_strs.iter().map(|s| s.len() as u8).collect();

quote! {
#atom_doc
pub type #type_name = ::string_cache::Atom<#static_set_name>;
Expand All @@ -265,13 +300,19 @@ impl AtomType {
#(
pub const #const_names: #type_name = #type_name::pack_static(#indices);
)*
#(
pub const #short_const_names: #type_name = #type_name::pack_inline(#short_values, #short_lens);
)*

#macro_doc
#[macro_export]
macro_rules! #macro_name {
#(
(#atoms) => { #module::#const_names };
)*
#(
(#short_strs) => { #module::#short_const_names };
)*
}
}
}
Expand Down

0 comments on commit 0ceca1f

Please sign in to comment.