Skip to content

Commit

Permalink
Use inline for short strings
Browse files Browse the repository at this point in the history
Closes #276.
  • Loading branch information
overlookmotel committed Jul 17, 2024
1 parent 4042693 commit 538f87d
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 34 deletions.
3 changes: 3 additions & 0 deletions integration-tests/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ fn main() {
"html",
"head",
"id",
"❤",
"❤💯",
"❤💯❤💯",
])
.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("test_atom.rs"))
.unwrap()
Expand Down
19 changes: 11 additions & 8 deletions integration-tests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ fn test_types() {
assert!(Atom::from("").is_static());
assert!(Atom::from("defaults").is_static());
assert!(Atom::from("font-weight").is_static());
assert!(Atom::from("id").is_static());
assert!(Atom::from("body").is_static());
assert!(Atom::from("a").is_static());
assert!(Atom::from("address").is_static());
assert!(Atom::from("id").is_inline());
assert!(Atom::from("body").is_inline());
assert!(Atom::from("a").is_inline());
assert!(Atom::from("address").is_inline());
assert!(Atom::from("c").is_inline());
assert!(Atom::from("zz").is_inline());
assert!(Atom::from("zzz").is_inline());
Expand Down Expand Up @@ -173,11 +173,11 @@ fn repr() {
// Static atoms
check_static("defaults", test_atom!("defaults"));
check_static("font-weight", test_atom!("font-weight"));
check_static("a", test_atom!("a"));
check_static("address", test_atom!("address"));
check_static("area", test_atom!("area"));

// Inline atoms
check("a", 0x0000_0000_0000_6111);
check("address", 0x7373_6572_6464_6171);
check("area", 0x0000_0061_6572_6141);
check("e", 0x0000_0000_0000_6511);
check("xyzzy", 0x0000_797A_7A79_7851);
check("xyzzy01", 0x3130_797A_7A79_7871);
Expand All @@ -201,7 +201,10 @@ fn atom_macro() {
assert_eq!(test_atom!("a"), Atom::from("a"));
assert_eq!(test_atom!("body"), Atom::from("body"));
assert_eq!(test_atom!("address"), Atom::from("address"));
assert_eq!(test_atom!("❤"), Atom::from("❤"));
assert_eq!(test_atom!("❤💯"), Atom::from("❤💯"));
assert_eq!(test_atom!("font-weight"), Atom::from("font-weight"));
assert_eq!(test_atom!("❤💯❤💯"), Atom::from("❤💯❤💯"));
}

#[test]
Expand Down Expand Up @@ -300,7 +303,7 @@ fn test_from_string() {
#[test]
fn test_try_static() {
assert!(Atom::try_static("defaults").is_some());
assert!(Atom::try_static("head").is_some());
assert!(Atom::try_static("head").is_none());
assert!(Atom::try_static("not in the static table").is_none());
}

Expand Down
53 changes: 37 additions & 16 deletions src/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,25 @@ impl<Static> Atom<Static> {
}
}

/// For the atom!() macros
#[inline(always)]
#[doc(hidden)]
pub const fn pack_inline(mut n: u64, len: u8) -> Self {
if cfg!(target_endian = "big") {
// Reverse order of top 7 bytes.
// Bottom 8 bits of `n` are zero, and we need that to remain so.
// String data is stored in top 7 bytes, tag and length in bottom byte.
n = n.to_le() << 8;
}

let data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET) | n;
Self {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}

fn tag(&self) -> u8 {
(self.unsafe_data.get() & TAG_MASK) as u8
}
Expand Down Expand Up @@ -186,20 +205,22 @@ impl<Static: StaticAtomSet> Hash for Atom<Static> {

impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
fn from(string_to_add: Cow<'a, str>) -> Self {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let len = string_to_add.len();
if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes())
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
let len = string_to_add.len();
if len == 0 {
Self::pack_static(Static::empty_string_index())
} else if len <= MAX_INLINE_LEN {
let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << LEN_OFFSET);
{
let dest = inline_atom_slice_mut(&mut data);
dest[..len].copy_from_slice(string_to_add.as_bytes());
}
Atom {
// INLINE_TAG ensures this is never zero
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
} else {
Self::try_static_internal(&*string_to_add).unwrap_or_else(|hash| {
let ptr: std::ptr::NonNull<Entry> = DYNAMIC_SET.insert(string_to_add, hash.g);
let data = ptr.as_ptr() as u64;
debug_assert!(0 == data & TAG_MASK);
Expand All @@ -208,8 +229,8 @@ impl<'a, Static: StaticAtomSet> From<Cow<'a, str>> for Atom<Static> {
unsafe_data: unsafe { NonZeroU64::new_unchecked(data) },
phantom: PhantomData,
}
}
})
})
}
}
}

Expand Down
51 changes: 41 additions & 10 deletions string-cache-codegen/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,19 @@ impl AtomType {
// which would cause divisions by zero in rust-phf.
self.atoms.insert(String::new());

let atoms: Vec<&str> = self.atoms.iter().map(|s| &**s).collect();
let hash_state = phf_generator::generate_hash(&atoms);
// Strings over 7 bytes + empty string added to static set.
// Otherwise stored inline.
let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self
.atoms
.iter()
.map(String::as_str)
.partition(|s| s.len() > 7 || s.is_empty());

// Static strings
let hash_state = phf_generator::generate_hash(&static_strs);
let phf_generator::HashState { key, disps, map } = hash_state;
let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip();
let atoms: Vec<&str> = map.iter().map(|&idx| atoms[idx]).collect();
let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect();
let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32;
let indices = 0..atoms.len() as u32;

Expand Down Expand Up @@ -228,16 +236,33 @@ impl AtomType {
let macro_name = new_term(&*self.macro_name);
let module = module.parse::<proc_macro2::TokenStream>().unwrap();
let atom_prefix = format!("ATOM_{}_", type_name.to_string().to_uppercase());
let const_names: Vec<_> = atoms
let new_const_name = |atom: &str| {
let mut name = atom_prefix.clone();
for c in atom.chars() {
name.push_str(&format!("_{:02X}", c as u32))
}
new_term(&name)
};
let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect();

// Inline strings
let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs
.iter()
.map(|atom| {
let mut name = atom_prefix.clone();
for c in atom.chars() {
name.push_str(&format!("_{:02X}", c as u32))
.map(|s| {
let const_name = new_const_name(s);

let mut value = 0u64;
for (index, c) in s.bytes().enumerate() {
value = value | ((c as u64) << (index * 8 + 8));
}
new_term(&name)

let len = s.len() as u8;

(const_name, (value, len))
})
.collect();
.unzip();
let (inline_values, inline_lengths): (Vec<_>, Vec<_>) =
inline_values_and_lengths.into_iter().unzip();

quote! {
#atom_doc
Expand Down Expand Up @@ -265,13 +290,19 @@ impl AtomType {
#(
pub const #const_names: #type_name = #type_name::pack_static(#indices);
)*
#(
pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths);
)*

#macro_doc
#[macro_export]
macro_rules! #macro_name {
#(
(#atoms) => { #module::#const_names };
)*
#(
(#inline_strs) => { #module::#inline_const_names };
)*
}
}
}
Expand Down

0 comments on commit 538f87d

Please sign in to comment.