diff --git a/crates/ty_python_semantic/src/semantic_index/use_def.rs b/crates/ty_python_semantic/src/semantic_index/use_def.rs index c0597e87a8e51b..ea62748c52c06f 100644 --- a/crates/ty_python_semantic/src/semantic_index/use_def.rs +++ b/crates/ty_python_semantic/src/semantic_index/use_def.rs @@ -161,10 +161,11 @@ //! this in the future for some closures, but for now this is where we start.) //! //! The data structure we build to answer these questions is the `UseDefMap`. It has a -//! `bindings_by_use` vector of [`Bindings`] indexed by [`ScopedUseId`], a -//! `declarations_by_binding` vector of [`Declarations`] indexed by [`ScopedDefinitionId`], a +//! `bindings_by_use` vector of [`InternedBindingsId`] indexed by [`ScopedUseId`] +//! (plus an interned bindings table), a +//! `declarations_by_binding` vector of [`InternedDeclarationsId`] indexed by [`ScopedDefinitionId`], a //! `bindings_by_declaration` vector of [`Bindings`] indexed by [`ScopedDefinitionId`], and -//! `public_bindings` and `public_definitions` vectors indexed by [`ScopedPlaceId`]. The values in +//! `end_of_scope_symbols` and `end_of_scope_members` vectors indexed by [`ScopedSymbolId`]/[`ScopedMemberId`]. The values in //! each of these vectors are (in principle) a list of live bindings at that use/definition, or at //! the end of the scope for that place, with a list of the dominating constraints for each //! binding. @@ -241,7 +242,7 @@ //! visits a `StmtIf` node. use ruff_index::{IndexVec, newtype_index}; -use rustc_hash::FxHashMap; +use rustc_hash::{FxBuildHasher, FxHashMap}; use crate::node_key::NodeKey; use crate::place::BoundnessAnalysis; @@ -270,6 +271,35 @@ mod place_state; pub(super) use place_state::PreviousDefinitions; pub(crate) use place_state::{LiveBinding, ScopedDefinitionId}; +/// Uniquely identifies an interned [`Bindings`] entry in [`UseDefMap::interned_bindings`]. +#[newtype_index] +#[derive(salsa::Update, get_size2::GetSize)] +struct InternedBindingsId; + +/// Uniquely identifies an interned [`Declarations`] entry in [`UseDefMap::interned_declarations`]. +#[newtype_index] +#[derive(salsa::Update, get_size2::GetSize)] +struct InternedDeclarationsId; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, salsa::Update, get_size2::GetSize)] +struct InternedPlaceStateId(InternedBindingsId, InternedDeclarationsId); + +impl InternedPlaceStateId { + fn bindings_id(self) -> InternedBindingsId { + self.0 + } + + fn declarations_id(self) -> InternedDeclarationsId { + self.1 + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, salsa::Update, get_size2::GetSize)] +enum InternedEnclosingSnapshotId { + Constraint(ScopedNarrowingConstraint), + Bindings(InternedBindingsId), +} + /// Applicable definitions and constraints for every use of a name. #[derive(Debug, PartialEq, Eq, salsa::Update, get_size2::GetSize)] pub(crate) struct UseDefMap<'db> { @@ -283,8 +313,13 @@ pub(crate) struct UseDefMap<'db> { /// Array of reachability constraints in this scope. reachability_constraints: ReachabilityConstraints, + /// Interned [`Bindings`] values. + interned_bindings: IndexVec, + /// Interned [`Declarations`] values. + interned_declarations: IndexVec, + /// [`Bindings`] reaching a [`ScopedUseId`]. - bindings_by_use: IndexVec, + bindings_by_use: IndexVec, /// Tracks whether or not a given AST node is reachable from the start of the scope. node_reachability: FxHashMap, @@ -295,7 +330,7 @@ pub(crate) struct UseDefMap<'db> { /// If the definition is both a declaration and a binding -- `x: int = 1` for example -- then /// we don't actually need anything here, all we'll need to validate is that our own RHS is a /// valid assignment to our own annotation. - declarations_by_binding: FxHashMap, Declarations>, + declarations_by_binding: FxHashMap, InternedDeclarationsId>, /// If the definition is a declaration (only) -- `x: int` for example -- then we need /// [`Bindings`] to know whether this declaration is consistent with the previously @@ -307,13 +342,13 @@ pub(crate) struct UseDefMap<'db> { /// /// If we see a binding to a `Final`-qualified symbol, we also need this map to find previous /// bindings to that symbol. If there are any, the assignment is invalid. - bindings_by_definition: FxHashMap, Bindings>, + bindings_by_definition: FxHashMap, InternedBindingsId>, /// [`PlaceState`] visible at end of scope for each symbol. end_of_scope_symbols: IndexVec, /// [`PlaceState`] visible at end of scope for each member. - end_of_scope_members: IndexVec, + end_of_scope_members: IndexVec, /// All potentially reachable bindings and declarations, for each symbol. reachable_definitions_by_symbol: IndexVec, @@ -323,7 +358,7 @@ pub(crate) struct UseDefMap<'db> { /// Snapshot of bindings in this scope that can be used to resolve a reference in a nested /// scope. - enclosing_snapshots: EnclosingSnapshots, + enclosing_snapshots: IndexVec, /// Whether or not the end of the scope is reachable. /// @@ -355,8 +390,9 @@ impl<'db> UseDefMap<'db> { &self, use_id: ScopedUseId, ) -> BindingWithConstraintsIterator<'_, 'db> { + let bindings_id = self.bindings_by_use[use_id]; self.bindings_iterator( - &self.bindings_by_use[use_id], + &self.interned_bindings[bindings_id], BoundnessAnalysis::BasedOnUnboundVisibility, ) } @@ -460,8 +496,9 @@ impl<'db> UseDefMap<'db> { &self, member: ScopedMemberId, ) -> BindingWithConstraintsIterator<'_, 'db> { + let place_state_id = self.end_of_scope_members[member]; self.bindings_iterator( - self.end_of_scope_members[member].bindings(), + &self.interned_bindings[place_state_id.bindings_id()], BoundnessAnalysis::BasedOnUnboundVisibility, ) } @@ -486,9 +523,9 @@ impl<'db> UseDefMap<'db> { pub(crate) fn reachable_member_bindings( &self, - symbol: ScopedMemberId, + member: ScopedMemberId, ) -> BindingWithConstraintsIterator<'_, 'db> { - let bindings = &self.reachable_definitions_by_member[symbol].bindings; + let bindings = &self.reachable_definitions_by_member[member].bindings; self.bindings_iterator(bindings, BoundnessAnalysis::AssumeBound) } @@ -503,13 +540,19 @@ impl<'db> UseDefMap<'db> { // TODO: We haven't implemented proper boundness analysis for nonlocal symbols, so we assume the boundness is bound for now. BoundnessAnalysis::AssumeBound }; + match self.enclosing_snapshots.get(snapshot_id) { - Some(EnclosingSnapshot::Constraint(constraint)) => { + Some(InternedEnclosingSnapshotId::Constraint(constraint)) => { EnclosingSnapshotResult::FoundConstraint(*constraint) } - Some(EnclosingSnapshot::Bindings(bindings)) => EnclosingSnapshotResult::FoundBindings( - self.bindings_iterator(bindings, boundness_analysis), - ), + Some(InternedEnclosingSnapshotId::Bindings(bindings_id)) => { + EnclosingSnapshotResult::FoundBindings( + self.bindings_iterator( + &self.interned_bindings[*bindings_id], + boundness_analysis, + ), + ) + } None => EnclosingSnapshotResult::NotFound, } } @@ -518,8 +561,9 @@ impl<'db> UseDefMap<'db> { &self, definition: Definition<'db>, ) -> BindingWithConstraintsIterator<'_, 'db> { + let bindings_id = self.bindings_by_definition[&definition]; self.bindings_iterator( - &self.bindings_by_definition[&definition], + &self.interned_bindings[bindings_id], BoundnessAnalysis::BasedOnUnboundVisibility, ) } @@ -528,8 +572,9 @@ impl<'db> UseDefMap<'db> { &self, binding: Definition<'db>, ) -> DeclarationsIterator<'_, 'db> { + let declarations_id = self.declarations_by_binding[&binding]; self.declarations_iterator( - &self.declarations_by_binding[&binding], + &self.interned_declarations[declarations_id], BoundnessAnalysis::BasedOnUnboundVisibility, ) } @@ -556,7 +601,8 @@ impl<'db> UseDefMap<'db> { &'map self, member: ScopedMemberId, ) -> DeclarationsIterator<'map, 'db> { - let declarations = self.end_of_scope_members[member].declarations(); + let place_state_id = self.end_of_scope_members[member]; + let declarations = &self.interned_declarations[place_state_id.declarations_id()]; self.declarations_iterator(declarations, BoundnessAnalysis::BasedOnUnboundVisibility) } @@ -802,7 +848,7 @@ impl<'db> Iterator for DeclarationsIterator<'_, 'db> { impl std::iter::FusedIterator for DeclarationsIterator<'_, '_> {} -#[derive(Debug, PartialEq, Eq, salsa::Update, get_size2::GetSize)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, salsa::Update, get_size2::GetSize)] struct ReachableDefinitions { bindings: Bindings, declarations: Declarations, @@ -1457,21 +1503,70 @@ impl<'db> UseDefMapBuilder<'db> { .add_or_constraint(self.reachability, snapshot.reachability); } - fn mark_reachability_constraints(&mut self) { + pub(super) fn finish(mut self) -> UseDefMap<'db> { + self.all_definitions.shrink_to_fit(); + self.symbol_states.shrink_to_fit(); + self.member_states.shrink_to_fit(); + self.reachable_symbol_definitions.shrink_to_fit(); + self.reachable_member_definitions.shrink_to_fit(); + self.bindings_by_use.shrink_to_fit(); + self.node_reachability.shrink_to_fit(); + self.declarations_by_binding.shrink_to_fit(); + self.bindings_by_definition.shrink_to_fit(); + self.enclosing_snapshots.shrink_to_fit(); + + let mut interned_bindings = IndexVec::with_capacity(self.bindings_by_definition.len()); + let mut interned_ids_by_bindings = + FxHashMap::with_capacity_and_hasher(self.bindings_by_definition.len(), FxBuildHasher); + let mut interned_declarations = IndexVec::with_capacity(self.declarations_by_binding.len()); + let mut interned_ids_by_declarations = + FxHashMap::with_capacity_and_hasher(self.declarations_by_binding.len(), FxBuildHasher); + // These fields are manually interned because they have a statistically high duplication rate (>50%). + let bindings_by_definition = Self::intern_bindings_by_definition( + self.bindings_by_definition, + &mut interned_bindings, + &mut interned_ids_by_bindings, + ); + let declarations_by_binding = Self::intern_declarations_by_binding( + self.declarations_by_binding, + &mut interned_declarations, + &mut interned_ids_by_declarations, + ); + let bindings_by_use = Self::intern_bindings_by_use( + self.bindings_by_use, + &mut interned_bindings, + &mut interned_ids_by_bindings, + ); + let end_of_scope_members = Self::intern_end_of_scope_members( + self.member_states, + &mut interned_bindings, + &mut interned_ids_by_bindings, + &mut interned_declarations, + &mut interned_ids_by_declarations, + ); + let enclosing_snapshots = Self::intern_enclosing_snapshots( + self.enclosing_snapshots, + &mut interned_bindings, + &mut interned_ids_by_bindings, + ); + + interned_bindings.shrink_to_fit(); + interned_declarations.shrink_to_fit(); + // We only walk the fields that are copied through to the UseDefMap when we finish building // it. - for bindings in &mut self.bindings_by_use { + for bindings in &mut interned_bindings { bindings.finish(&mut self.reachability_constraints); } + for declarations in &mut interned_declarations { + declarations.finish(&mut self.reachability_constraints); + } for constraint in self.node_reachability.values() { self.reachability_constraints.mark_used(*constraint); } for symbol_state in &mut self.symbol_states { symbol_state.finish(&mut self.reachability_constraints); } - for member_state in &mut self.member_states { - member_state.finish(&mut self.reachability_constraints); - } for reachable_definition in &mut self.reachable_symbol_definitions { reachable_definition .bindings @@ -1488,46 +1583,183 @@ impl<'db> UseDefMapBuilder<'db> { .declarations .finish(&mut self.reachability_constraints); } - for declarations in self.declarations_by_binding.values_mut() { - declarations.finish(&mut self.reachability_constraints); - } - for bindings in self.bindings_by_definition.values_mut() { - bindings.finish(&mut self.reachability_constraints); - } - for eager_snapshot in &mut self.enclosing_snapshots { - eager_snapshot.finish(&mut self.reachability_constraints); + for enclosing_snapshot in &enclosing_snapshots { + // Bindings are already marked above. + if let InternedEnclosingSnapshotId::Constraint(constraint) = enclosing_snapshot { + self.reachability_constraints.mark_used(*constraint); + } } self.reachability_constraints.mark_used(self.reachability); - } - - pub(super) fn finish(mut self) -> UseDefMap<'db> { - self.mark_reachability_constraints(); - - self.all_definitions.shrink_to_fit(); - self.symbol_states.shrink_to_fit(); - self.member_states.shrink_to_fit(); - self.reachable_symbol_definitions.shrink_to_fit(); - self.reachable_member_definitions.shrink_to_fit(); - self.bindings_by_use.shrink_to_fit(); - self.node_reachability.shrink_to_fit(); - self.declarations_by_binding.shrink_to_fit(); - self.bindings_by_definition.shrink_to_fit(); - self.enclosing_snapshots.shrink_to_fit(); UseDefMap { all_definitions: self.all_definitions, predicates: self.predicates.build(), reachability_constraints: self.reachability_constraints.build(), - bindings_by_use: self.bindings_by_use, + interned_bindings, + interned_declarations, + bindings_by_use, node_reachability: self.node_reachability, end_of_scope_symbols: self.symbol_states, - end_of_scope_members: self.member_states, + end_of_scope_members, reachable_definitions_by_symbol: self.reachable_symbol_definitions, reachable_definitions_by_member: self.reachable_member_definitions, - declarations_by_binding: self.declarations_by_binding, - bindings_by_definition: self.bindings_by_definition, - enclosing_snapshots: self.enclosing_snapshots, + declarations_by_binding, + bindings_by_definition, + enclosing_snapshots, end_of_scope_reachability: self.reachability, } } + + fn intern_bindings_by_definition( + bindings_by_definition: FxHashMap, Bindings>, + interned_bindings: &mut IndexVec, + interned_ids_by_bindings: &mut FxHashMap, + ) -> FxHashMap, InternedBindingsId> { + let mut interned_ids_by_definition: FxHashMap, InternedBindingsId> = + FxHashMap::with_capacity_and_hasher(bindings_by_definition.len(), FxBuildHasher); + + for (definition, bindings) in bindings_by_definition { + let interned_id = if let Some(interned_id) = interned_ids_by_bindings.get(&bindings) { + *interned_id + } else { + let interned_id = interned_bindings.push(bindings.clone()); + interned_ids_by_bindings.insert(bindings, interned_id); + interned_id + }; + interned_ids_by_definition.insert(definition, interned_id); + } + + interned_ids_by_definition.shrink_to_fit(); + interned_ids_by_definition + } + + fn intern_declarations_by_binding( + declarations_by_binding: FxHashMap, Declarations>, + interned_declarations: &mut IndexVec, + interned_ids_by_declarations: &mut FxHashMap, + ) -> FxHashMap, InternedDeclarationsId> { + let mut interned_ids_by_binding: FxHashMap, InternedDeclarationsId> = + FxHashMap::with_capacity_and_hasher(declarations_by_binding.len(), FxBuildHasher); + + for (binding, declarations) in declarations_by_binding { + let interned_id = + if let Some(interned_id) = interned_ids_by_declarations.get(&declarations) { + *interned_id + } else { + let interned_id = interned_declarations.push(declarations.clone()); + interned_ids_by_declarations.insert(declarations, interned_id); + interned_id + }; + interned_ids_by_binding.insert(binding, interned_id); + } + + interned_ids_by_binding.shrink_to_fit(); + interned_ids_by_binding + } + + fn intern_bindings_by_use( + bindings_by_use: IndexVec, + interned_bindings: &mut IndexVec, + interned_ids_by_bindings: &mut FxHashMap, + ) -> IndexVec { + let mut interned_ids_by_use: IndexVec = + IndexVec::with_capacity(bindings_by_use.len()); + + for bindings in bindings_by_use { + let interned_id = if let Some(interned_id) = interned_ids_by_bindings.get(&bindings) { + *interned_id + } else { + let interned_id = interned_bindings.push(bindings.clone()); + interned_ids_by_bindings.insert(bindings, interned_id); + interned_id + }; + interned_ids_by_use.push(interned_id); + } + + interned_ids_by_use.shrink_to_fit(); + interned_ids_by_use + } + + fn intern_end_of_scope_members( + end_of_scope_members: IndexVec, + interned_bindings: &mut IndexVec, + interned_ids_by_bindings: &mut FxHashMap, + interned_declarations: &mut IndexVec, + interned_ids_by_declarations: &mut FxHashMap, + ) -> IndexVec { + let mut interned_ids_by_member: IndexVec = + IndexVec::with_capacity(end_of_scope_members.len()); + let mut interned_ids_by_place_state: FxHashMap = + FxHashMap::with_capacity_and_hasher(end_of_scope_members.len(), FxBuildHasher); + + for place_state in end_of_scope_members { + let interned_id = if let Some(interned_id) = + interned_ids_by_place_state.get(&place_state) + { + *interned_id + } else { + let bindings_id = if let Some(bindings_id) = + interned_ids_by_bindings.get(place_state.bindings()) + { + *bindings_id + } else { + let bindings_id = interned_bindings.push(place_state.bindings().clone()); + interned_ids_by_bindings.insert(place_state.bindings().clone(), bindings_id); + bindings_id + }; + let declarations_id = if let Some(declarations_id) = + interned_ids_by_declarations.get(place_state.declarations()) + { + *declarations_id + } else { + let declarations_id = + interned_declarations.push(place_state.declarations().clone()); + interned_ids_by_declarations + .insert(place_state.declarations().clone(), declarations_id); + declarations_id + }; + let place_state_id = InternedPlaceStateId(bindings_id, declarations_id); + interned_ids_by_place_state.insert(place_state, place_state_id); + place_state_id + }; + interned_ids_by_member.push(interned_id); + } + + interned_ids_by_member.shrink_to_fit(); + interned_ids_by_member + } + + fn intern_enclosing_snapshots( + enclosing_snapshots: EnclosingSnapshots, + interned_bindings: &mut IndexVec, + interned_ids_by_bindings: &mut FxHashMap, + ) -> IndexVec { + let mut interned_ids_by_snapshot: IndexVec< + ScopedEnclosingSnapshotId, + InternedEnclosingSnapshotId, + > = IndexVec::with_capacity(enclosing_snapshots.len()); + + for snapshot in enclosing_snapshots { + let interned_id = match snapshot { + EnclosingSnapshot::Bindings(bindings) => { + let interned_bindings_id = + if let Some(interned_id) = interned_ids_by_bindings.get(&bindings) { + *interned_id + } else { + let interned_id = interned_bindings.push(bindings.clone()); + interned_ids_by_bindings.insert(bindings, interned_id); + interned_id + }; + InternedEnclosingSnapshotId::Bindings(interned_bindings_id) + } + EnclosingSnapshot::Constraint(constraint) => { + InternedEnclosingSnapshotId::Constraint(constraint) + } + }; + interned_ids_by_snapshot.push(interned_id); + } + + interned_ids_by_snapshot.shrink_to_fit(); + interned_ids_by_snapshot + } } diff --git a/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs b/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs index 71833f34063979..cddde912af1451 100644 --- a/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs +++ b/crates/ty_python_semantic/src/semantic_index/use_def/place_state.rs @@ -71,14 +71,14 @@ impl ScopedDefinitionId { /// Live declarations for a single place at some point in control flow, with their /// corresponding reachability constraints. -#[derive(Clone, Debug, Default, PartialEq, Eq, salsa::Update, get_size2::GetSize)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, salsa::Update, get_size2::GetSize)] pub(super) struct Declarations { /// A list of live declarations for this place, sorted by their `ScopedDefinitionId` live_declarations: SmallVec<[LiveDeclaration; 2]>, } /// One of the live declarations for a single place at some point in control flow. -#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, get_size2::GetSize)] pub(super) struct LiveDeclaration { pub(super) declaration: ScopedDefinitionId, pub(super) reachability_constraint: ScopedReachabilityConstraintId, @@ -184,28 +184,15 @@ impl Declarations { /// Even if it's a class scope (class variables are not visible to nested scopes) or there are no /// bindings, the current narrowing constraint is necessary for narrowing, so it's stored in /// `Constraint`. -#[derive(Clone, Debug, PartialEq, Eq, salsa::Update, get_size2::GetSize)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, salsa::Update, get_size2::GetSize)] pub(super) enum EnclosingSnapshot { Constraint(ScopedNarrowingConstraint), Bindings(Bindings), } -impl EnclosingSnapshot { - pub(super) fn finish(&mut self, reachability_constraints: &mut ReachabilityConstraintsBuilder) { - match self { - Self::Constraint(constraint) => { - reachability_constraints.mark_used(*constraint); - } - Self::Bindings(bindings) => { - bindings.finish(reachability_constraints); - } - } - } -} - /// Live bindings for a single place at some point in control flow. Each live binding comes /// with a set of narrowing constraints and a reachability constraint. -#[derive(Clone, Debug, Default, PartialEq, Eq, salsa::Update, get_size2::GetSize)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, salsa::Update, get_size2::GetSize)] pub(super) struct Bindings { /// The narrowing constraint applicable to the "unbound" binding, if we need access to it even /// when it's not visible. This happens in class scopes, where local name bindings are not visible @@ -232,7 +219,7 @@ impl Bindings { } /// One of the live bindings for a single place at some point in control flow. -#[derive(Clone, Copy, Debug, PartialEq, Eq, salsa::Update, get_size2::GetSize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, salsa::Update, get_size2::GetSize)] pub(crate) struct LiveBinding { pub(crate) binding: ScopedDefinitionId, pub(crate) narrowing_constraint: ScopedNarrowingConstraint, @@ -358,7 +345,7 @@ impl Bindings { } } -#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, get_size2::GetSize)] pub(in crate::semantic_index) struct PlaceState { declarations: Declarations, bindings: Bindings,