Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use FxIndexSet in the symbol interner. #117508

Merged
merged 1 commit into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 11 additions & 20 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
//! type, and vice versa.

use rustc_arena::DroplessArena;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxIndexSet;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::sync::Lock;
use rustc_macros::HashStable_Generic;
Expand Down Expand Up @@ -2076,43 +2076,33 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
}
}

#[derive(Default)]
pub(crate) struct Interner(Lock<InternerInner>);

// The `&'static str`s in this type actually point into the arena.
//
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
// found that to regress performance up to 2% in some cases. This might be
// revisited after further improvements to `indexmap`.
//
// This type is private to prevent accidentally constructing more than one
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
// between `Interner`s.
#[derive(Default)]
struct InternerInner {
arena: DroplessArena,
names: FxHashMap<&'static str, Symbol>,
strings: Vec<&'static str>,
strings: FxIndexSet<&'static str>,
}

impl Interner {
fn prefill(init: &[&'static str]) -> Self {
Interner(Lock::new(InternerInner {
strings: init.into(),
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
..Default::default()
arena: Default::default(),
strings: init.iter().copied().collect(),
}))
}

#[inline]
fn intern(&self, string: &str) -> Symbol {
let mut inner = self.0.lock();
if let Some(&name) = inner.names.get(string) {
return name;
if let Some(idx) = inner.strings.get_index_of(string) {
return Symbol::new(idx as u32);
}

let name = Symbol::new(inner.strings.len() as u32);

// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
// and immediately convert the clone back to `&[u8]`, all because there
// is no `inner.arena.alloc_str()` method. This is clearly safe.
Expand All @@ -2122,20 +2112,21 @@ impl Interner {
// SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) };
inner.strings.push(string);

// This second hash table lookup can be avoided by using `RawEntryMut`,
// but this code path isn't hot enough for it to be worth it. See
// #91445 for details.
inner.names.insert(string, name);
name
let (idx, is_new) = inner.strings.insert_full(string);
debug_assert!(is_new); // due to the get_index_of check above

Symbol::new(idx as u32)
}

/// Get the symbol as a string.
///
/// [`Symbol::as_str()`] should be used in preference to this function.
fn get(&self, symbol: Symbol) -> &str {
self.0.lock().strings[symbol.0.as_usize()]
self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
}
}

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_span/src/symbol/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::create_default_session_globals_then;

#[test]
fn interner_tests() {
let i = Interner::default();
let i = Interner::prefill(&[]);
// first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry:
Expand Down
Loading