From 2ff92e83af6d646e05218374954c6ed2ebb67b3d Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Mon, 28 Aug 2023 18:28:43 +0000 Subject: [PATCH 1/8] Introduce fast insertion at extremities to IntervalSet. --- compiler/rustc_index/src/interval.rs | 51 ++++++++++++++++++ compiler/rustc_mir_dataflow/src/points.rs | 66 +++++++++++++---------- 2 files changed, 90 insertions(+), 27 deletions(-) diff --git a/compiler/rustc_index/src/interval.rs b/compiler/rustc_index/src/interval.rs index 0225c5c4f329a..4af5bfcaee6dc 100644 --- a/compiler/rustc_index/src/interval.rs +++ b/compiler/rustc_index/src/interval.rs @@ -140,6 +140,49 @@ impl IntervalSet { result } + /// Specialized version of `insert` when we know that the inserted point is *before* any + /// contained. + pub fn prepend(&mut self, point: I) { + let point = point.index() as u32; + + if let Some((first_start, _)) = self.map.first_mut() { + assert!(point < *first_start); + if point + 1 == *first_start { + *first_start = point; + } else { + self.map.insert(0, (point, point)); + } + } else { + // If the map is empty, push is faster than insert. + self.map.push((point, point)); + } + + debug_assert!( + self.check_invariants(), + "wrong intervals after prepend {point:?} to {self:?}" + ); + } + + /// Specialized version of `insert` when we know that the inserted point is *after* any + /// contained. + pub fn append(&mut self, point: I) { + let point = point.index() as u32; + + if let Some((_, last_end)) = self.map.last_mut() + && let _ = assert!(*last_end < point) + && point == *last_end + 1 + { + *last_end = point; + } else { + self.map.push((point, point)); + } + + debug_assert!( + self.check_invariants(), + "wrong intervals after append {point:?} to {self:?}" + ); + } + pub fn contains(&self, needle: I) -> bool { let needle = needle.index() as u32; let Some(last) = self.map.partition_point(|r| r.0 <= needle).checked_sub(1) else { @@ -325,6 +368,14 @@ impl SparseIntervalMatrix { self.ensure_row(row).insert(point) } + pub fn prepend(&mut self, row: R, point: C) { + self.ensure_row(row).prepend(point) + } + + pub fn append(&mut self, row: R, point: C) { + self.ensure_row(row).append(point) + } + pub fn contains(&self, row: R, point: C) -> bool { self.row(row).is_some_and(|r| r.contains(point)) } diff --git a/compiler/rustc_mir_dataflow/src/points.rs b/compiler/rustc_mir_dataflow/src/points.rs index 70d1a34b5fb13..55a373d4c5199 100644 --- a/compiler/rustc_mir_dataflow/src/points.rs +++ b/compiler/rustc_mir_dataflow/src/points.rs @@ -3,7 +3,7 @@ use rustc_index::interval::SparseIntervalMatrix; use rustc_index::{Idx, IndexVec}; use rustc_middle::mir::{self, BasicBlock, Body, Location}; -use crate::framework::{Analysis, Results, ResultsVisitor, visit_results}; +use crate::framework::{Analysis, Direction, Results, ResultsVisitor, visit_results}; /// Maps between a `Location` and a `PointIndex` (and vice versa). pub struct DenseLocationMap { @@ -105,27 +105,47 @@ where N: Idx, A: Analysis<'tcx, Domain = DenseBitSet>, { - let values = SparseIntervalMatrix::new(elements.num_points()); - let mut visitor = Visitor { elements, values }; - visit_results( - body, - body.basic_blocks.reverse_postorder().iter().copied(), - &mut analysis, - &results, - &mut visitor, - ); - visitor.values + let mut values = SparseIntervalMatrix::new(elements.num_points()); + let reachable_blocks = mir::traversal::reachable_as_bitset(body); + if A::Direction::IS_BACKWARD { + // Iterate blocks in decreasing order, to visit locations in decreasing order. This + // allows to use the more efficient `prepend` method to interval sets. + let callback = |state: &DenseBitSet, location| { + let point = elements.point_from_location(location); + // Use internal iterator manually as it is much more efficient. + state.iter().for_each(|node| values.prepend(node, point)); + }; + let mut visitor = Visitor { callback }; + visit_results( + body, + // Note the `.rev()`. + body.basic_blocks.indices().filter(|&bb| reachable_blocks.contains(bb)).rev(), + &mut analysis, + &results, + &mut visitor, + ); + } else { + // Iterate blocks in increasing order, to visit locations in increasing order. This + // allows to use the more efficient `append` method to interval sets. + let callback = |state: &DenseBitSet, location| { + let point = elements.point_from_location(location); + // Use internal iterator manually as it is much more efficient. + state.iter().for_each(|node| values.append(node, point)); + }; + let mut visitor = Visitor { callback }; + visit_results(body, reachable_blocks.iter(), &mut analysis, &results, &mut visitor); + } + values } -struct Visitor<'a, N: Idx> { - elements: &'a DenseLocationMap, - values: SparseIntervalMatrix, +struct Visitor { + callback: F, } -impl<'tcx, A, N> ResultsVisitor<'tcx, A> for Visitor<'_, N> +impl<'tcx, A, F> ResultsVisitor<'tcx, A> for Visitor where - A: Analysis<'tcx, Domain = DenseBitSet>, - N: Idx, + A: Analysis<'tcx>, + F: FnMut(&A::Domain, Location), { fn visit_after_primary_statement_effect<'mir>( &mut self, @@ -134,11 +154,7 @@ where _statement: &'mir mir::Statement<'tcx>, location: Location, ) { - let point = self.elements.point_from_location(location); - // Use internal iterator manually as it is much more efficient. - state.iter().for_each(|node| { - self.values.insert(node, point); - }); + (self.callback)(state, location); } fn visit_after_primary_terminator_effect<'mir>( @@ -148,10 +164,6 @@ where _terminator: &'mir mir::Terminator<'tcx>, location: Location, ) { - let point = self.elements.point_from_location(location); - // Use internal iterator manually as it is much more efficient. - state.iter().for_each(|node| { - self.values.insert(node, point); - }); + (self.callback)(state, location); } } From 9b8a719ae48db491a5f18d52fdbb802508bf75a5 Mon Sep 17 00:00:00 2001 From: Camille GILLOT Date: Wed, 2 Jul 2025 08:36:22 +0000 Subject: [PATCH 2/8] Reimplement DestinationPropagation according to live ranges. --- compiler/rustc_index/src/interval.rs | 26 + .../rustc_mir_dataflow/src/impls/liveness.rs | 37 +- compiler/rustc_mir_dataflow/src/impls/mod.rs | 3 +- compiler/rustc_mir_dataflow/src/points.rs | 80 +- compiler/rustc_mir_transform/src/dest_prop.rs | 843 ++++++++---------- ...in.DestinationPropagation.panic-abort.diff | 16 +- ...n.DestinationPropagation.panic-unwind.diff | 16 +- 7 files changed, 449 insertions(+), 572 deletions(-) diff --git a/compiler/rustc_index/src/interval.rs b/compiler/rustc_index/src/interval.rs index 4af5bfcaee6dc..69a7a69610e04 100644 --- a/compiler/rustc_index/src/interval.rs +++ b/compiler/rustc_index/src/interval.rs @@ -219,6 +219,32 @@ impl IntervalSet { }) } + pub fn disjoint(&self, other: &IntervalSet) -> bool + where + I: Step, + { + let helper = move || { + let mut self_iter = self.iter_intervals(); + let mut other_iter = other.iter_intervals(); + + let mut self_current = self_iter.next()?; + let mut other_current = other_iter.next()?; + + loop { + if self_current.end <= other_current.start { + self_current = self_iter.next()?; + continue; + } + if other_current.end <= self_current.start { + other_current = other_iter.next()?; + continue; + } + return Some(false); + } + }; + helper().unwrap_or(true) + } + pub fn is_empty(&self) -> bool { self.map.is_empty() } diff --git a/compiler/rustc_mir_dataflow/src/impls/liveness.rs b/compiler/rustc_mir_dataflow/src/impls/liveness.rs index 6ec1b03a34e68..5eba474a60c7a 100644 --- a/compiler/rustc_mir_dataflow/src/impls/liveness.rs +++ b/compiler/rustc_mir_dataflow/src/impls/liveness.rs @@ -92,7 +92,7 @@ impl<'tcx> Visitor<'tcx> for TransferFunction<'_> { } match DefUse::for_place(*place, context) { - Some(DefUse::Def) => { + DefUse::Def => { if let PlaceContext::MutatingUse( MutatingUseContext::Call | MutatingUseContext::AsmOutput, ) = context @@ -105,8 +105,8 @@ impl<'tcx> Visitor<'tcx> for TransferFunction<'_> { self.0.kill(place.local); } } - Some(DefUse::Use) => self.0.gen_(place.local), - None => {} + DefUse::Use => self.0.gen_(place.local), + DefUse::PartialWrite | DefUse::NonUse => {} } self.visit_projection(place.as_ref(), context, location); @@ -131,23 +131,29 @@ impl<'tcx> Visitor<'tcx> for YieldResumeEffect<'_> { } #[derive(Eq, PartialEq, Clone)] -enum DefUse { +pub enum DefUse { + /// Full write to the local. Def, + /// Read of any part of the local. Use, + /// Partial write to the local. + PartialWrite, + /// Non-use, like debuginfo. + NonUse, } impl DefUse { fn apply(state: &mut DenseBitSet, place: Place<'_>, context: PlaceContext) { match DefUse::for_place(place, context) { - Some(DefUse::Def) => state.kill(place.local), - Some(DefUse::Use) => state.gen_(place.local), - None => {} + DefUse::Def => state.kill(place.local), + DefUse::Use => state.gen_(place.local), + DefUse::PartialWrite | DefUse::NonUse => {} } } - fn for_place(place: Place<'_>, context: PlaceContext) -> Option { + pub fn for_place(place: Place<'_>, context: PlaceContext) -> DefUse { match context { - PlaceContext::NonUse(_) => None, + PlaceContext::NonUse(_) => DefUse::NonUse, PlaceContext::MutatingUse( MutatingUseContext::Call @@ -156,21 +162,20 @@ impl DefUse { | MutatingUseContext::Store | MutatingUseContext::Deinit, ) => { + // Treat derefs as a use of the base local. `*p = 4` is not a def of `p` but a use. if place.is_indirect() { - // Treat derefs as a use of the base local. `*p = 4` is not a def of `p` but a - // use. - Some(DefUse::Use) + DefUse::Use } else if place.projection.is_empty() { - Some(DefUse::Def) + DefUse::Def } else { - None + DefUse::PartialWrite } } // Setting the discriminant is not a use because it does no reading, but it is also not // a def because it does not overwrite the whole place PlaceContext::MutatingUse(MutatingUseContext::SetDiscriminant) => { - place.is_indirect().then_some(DefUse::Use) + if place.is_indirect() { DefUse::Use } else { DefUse::PartialWrite } } // All other contexts are uses... @@ -188,7 +193,7 @@ impl DefUse { | NonMutatingUseContext::PlaceMention | NonMutatingUseContext::FakeBorrow | NonMutatingUseContext::SharedBorrow, - ) => Some(DefUse::Use), + ) => DefUse::Use, PlaceContext::MutatingUse(MutatingUseContext::Projection) | PlaceContext::NonMutatingUse(NonMutatingUseContext::Projection) => { diff --git a/compiler/rustc_mir_dataflow/src/impls/mod.rs b/compiler/rustc_mir_dataflow/src/impls/mod.rs index 3f29b819a6d18..6d573e1c00e1c 100644 --- a/compiler/rustc_mir_dataflow/src/impls/mod.rs +++ b/compiler/rustc_mir_dataflow/src/impls/mod.rs @@ -9,7 +9,8 @@ pub use self::initialized::{ MaybeUninitializedPlaces, MaybeUninitializedPlacesDomain, }; pub use self::liveness::{ - MaybeLiveLocals, MaybeTransitiveLiveLocals, TransferFunction as LivenessTransferFunction, + DefUse, MaybeLiveLocals, MaybeTransitiveLiveLocals, + TransferFunction as LivenessTransferFunction, }; pub use self::storage_liveness::{ MaybeRequiresStorage, MaybeStorageDead, MaybeStorageLive, always_storage_live_locals, diff --git a/compiler/rustc_mir_dataflow/src/points.rs b/compiler/rustc_mir_dataflow/src/points.rs index 55a373d4c5199..e3d1e04a319ba 100644 --- a/compiler/rustc_mir_dataflow/src/points.rs +++ b/compiler/rustc_mir_dataflow/src/points.rs @@ -1,9 +1,5 @@ -use rustc_index::bit_set::DenseBitSet; -use rustc_index::interval::SparseIntervalMatrix; use rustc_index::{Idx, IndexVec}; -use rustc_middle::mir::{self, BasicBlock, Body, Location}; - -use crate::framework::{Analysis, Direction, Results, ResultsVisitor, visit_results}; +use rustc_middle::mir::{BasicBlock, Body, Location}; /// Maps between a `Location` and a `PointIndex` (and vice versa). pub struct DenseLocationMap { @@ -93,77 +89,3 @@ rustc_index::newtype_index! { #[debug_format = "PointIndex({})"] pub struct PointIndex {} } - -/// Add points depending on the result of the given dataflow analysis. -pub fn save_as_intervals<'tcx, N, A>( - elements: &DenseLocationMap, - body: &mir::Body<'tcx>, - mut analysis: A, - results: Results, -) -> SparseIntervalMatrix -where - N: Idx, - A: Analysis<'tcx, Domain = DenseBitSet>, -{ - let mut values = SparseIntervalMatrix::new(elements.num_points()); - let reachable_blocks = mir::traversal::reachable_as_bitset(body); - if A::Direction::IS_BACKWARD { - // Iterate blocks in decreasing order, to visit locations in decreasing order. This - // allows to use the more efficient `prepend` method to interval sets. - let callback = |state: &DenseBitSet, location| { - let point = elements.point_from_location(location); - // Use internal iterator manually as it is much more efficient. - state.iter().for_each(|node| values.prepend(node, point)); - }; - let mut visitor = Visitor { callback }; - visit_results( - body, - // Note the `.rev()`. - body.basic_blocks.indices().filter(|&bb| reachable_blocks.contains(bb)).rev(), - &mut analysis, - &results, - &mut visitor, - ); - } else { - // Iterate blocks in increasing order, to visit locations in increasing order. This - // allows to use the more efficient `append` method to interval sets. - let callback = |state: &DenseBitSet, location| { - let point = elements.point_from_location(location); - // Use internal iterator manually as it is much more efficient. - state.iter().for_each(|node| values.append(node, point)); - }; - let mut visitor = Visitor { callback }; - visit_results(body, reachable_blocks.iter(), &mut analysis, &results, &mut visitor); - } - values -} - -struct Visitor { - callback: F, -} - -impl<'tcx, A, F> ResultsVisitor<'tcx, A> for Visitor -where - A: Analysis<'tcx>, - F: FnMut(&A::Domain, Location), -{ - fn visit_after_primary_statement_effect<'mir>( - &mut self, - _analysis: &mut A, - state: &A::Domain, - _statement: &'mir mir::Statement<'tcx>, - location: Location, - ) { - (self.callback)(state, location); - } - - fn visit_after_primary_terminator_effect<'mir>( - &mut self, - _analysis: &mut A, - state: &A::Domain, - _terminator: &'mir mir::Terminator<'tcx>, - location: Location, - ) { - (self.callback)(state, location); - } -} diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index cf7425251e8e0..da1d339b52658 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -59,6 +59,12 @@ //! The first two conditions are simple structural requirements on the `Assign` statements that can //! be trivially checked. The third requirement however is more difficult and costly to check. //! +//! ## Current implementation +//! +//! The current implementation relies on live range computation to check for conflicts. We only +//! allow to merge locals that have disjoint live ranges. The live range are defined with +//! half-statement granularity, so as to make all writes be live for at least a half statement. +//! //! ## Future Improvements //! //! There are a number of ways in which this pass could be improved in the future: @@ -117,9 +123,8 @@ //! - Layout optimizations for coroutines have been added to improve code generation for //! async/await, which are very similar in spirit to what this optimization does. //! -//! Also, rustc now has a simple NRVO pass (see `nrvo.rs`), which handles a subset of the cases that -//! this destination propagation pass handles, proving that similar optimizations can be performed -//! on MIR. +//! [The next approach][attempt 4] computes a conflict matrix between locals by forbidding merging +//! locals with competing writes or with one write while the other is live. //! //! ## Pre/Post Optimization //! @@ -130,20 +135,18 @@ //! [attempt 1]: https://github.com/rust-lang/rust/pull/47954 //! [attempt 2]: https://github.com/rust-lang/rust/pull/71003 //! [attempt 3]: https://github.com/rust-lang/rust/pull/72632 +//! [attempt 4]: https://github.com/rust-lang/rust/pull/96451 -use rustc_data_structures::fx::{FxIndexMap, IndexEntry, IndexOccupiedEntry}; +use rustc_data_structures::fx::FxIndexMap; use rustc_index::bit_set::DenseBitSet; use rustc_index::interval::SparseIntervalMatrix; -use rustc_middle::bug; -use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor}; -use rustc_middle::mir::{ - Body, HasLocalDecls, InlineAsmOperand, Local, LocalKind, Location, MirDumper, Operand, - PassWhere, Place, Rvalue, Statement, StatementKind, TerminatorKind, traversal, -}; +use rustc_index::{IndexSlice, IndexVec, newtype_index}; +use rustc_middle::mir::visit::{MutVisitor, NonMutatingUseContext, PlaceContext, Visitor}; +use rustc_middle::mir::*; use rustc_middle::ty::TyCtxt; -use rustc_mir_dataflow::Analysis; -use rustc_mir_dataflow::impls::MaybeLiveLocals; -use rustc_mir_dataflow::points::{DenseLocationMap, PointIndex, save_as_intervals}; +use rustc_mir_dataflow::impls::DefUse; +use rustc_mir_dataflow::points::{DenseLocationMap, PointIndex}; +use rustc_mir_dataflow::{Analysis, Backward, Results}; use tracing::{debug, trace}; pub(super) struct DestinationPropagation; @@ -161,84 +164,62 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation { sess.mir_opt_level() >= 3 } + #[tracing::instrument(level = "trace", skip(self, tcx, body))] fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) { let def_id = body.source.def_id(); - let mut candidates = Candidates::default(); - let mut write_info = WriteInfo::default(); - trace!(func = ?tcx.def_path_str(def_id)); + trace!(?def_id); let borrowed = rustc_mir_dataflow::impls::borrowed_locals(body); - let live = MaybeLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp")); + let candidates = Candidates::find(body, &borrowed); + trace!(?candidates); + if candidates.c.is_empty() { + return; + } + + let live = + MaybeTwoStepLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp")); + let points = DenseLocationMap::new(body); - let mut live = save_as_intervals(&points, body, live.analysis, live.results); - - // In order to avoid having to collect data for every single pair of locals in the body, we - // do not allow doing more than one merge for places that are derived from the same local at - // once. To avoid missed opportunities, we instead iterate to a fixed point - we'll refer to - // each of these iterations as a "round." - // - // Reaching a fixed point could in theory take up to `min(l, s)` rounds - however, we do not - // expect to see MIR like that. To verify this, a test was run against `[rust-lang/regex]` - - // the average MIR body saw 1.32 full iterations of this loop. The most that was hit were 30 - // for a single function. Only 80/2801 (2.9%) of functions saw at least 5. - // - // [rust-lang/regex]: - // https://github.com/rust-lang/regex/tree/b5372864e2df6a2f5e543a556a62197f50ca3650 - let mut round_count = 0; - loop { - // PERF: Can we do something smarter than recalculating the candidates and liveness - // results? - candidates.reset_and_find(body, &borrowed); - trace!(?candidates); - dest_prop_mir_dump(tcx, body, &points, &live, round_count); - - FilterInformation::filter_liveness( - &mut candidates, - &points, - &live, - &mut write_info, - body, - ); - - // Because we only filter once per round, it is unsound to use a local for more than - // one merge operation within a single round of optimizations. We store here which ones - // we have already used. - let mut merged_locals: DenseBitSet = - DenseBitSet::new_empty(body.local_decls.len()); - - // This is the set of merges we will apply this round. It is a subset of the candidates. - let mut merges = FxIndexMap::default(); - - for (src, candidates) in candidates.c.iter() { - if merged_locals.contains(*src) { - continue; - } - let Some(dest) = candidates.iter().find(|dest| !merged_locals.contains(**dest)) - else { - continue; - }; - - // Replace `src` by `dest` everywhere. - merges.insert(*src, *dest); - merged_locals.insert(*src); - merged_locals.insert(*dest); - - // Update liveness information based on the merge we just performed. - // Every location where `src` was live, `dest` will be live. - live.union_rows(*src, *dest); - } - trace!(merging = ?merges); + let mut relevant = RelevantLocals::compute(&candidates, body.local_decls.len()); + let mut live = save_as_intervals(&points, body, &relevant.original, live.results); - if merges.is_empty() { - break; - } - round_count += 1; + dest_prop_mir_dump(tcx, body, &points, &live, &relevant); + + let mut merged_locals = DenseBitSet::new_empty(body.local_decls.len()); + + for (src, candidates) in candidates.c.into_iter() { + trace!(?src, ?candidates); + + let Some(src) = relevant.find(src) else { continue }; + let Some(src_live_ranges) = &live.row(src) else { continue }; + trace!(?src, ?src_live_ranges); + + let dst = candidates.into_iter().find_map(|dst| { + let dst = relevant.find(dst)?; + let dst_live_ranges = &live.row(dst)?; + trace!(?dst, ?dst_live_ranges); + + let disjoint = src_live_ranges.disjoint(dst_live_ranges); + disjoint.then_some(dst) + }); + let Some(dst) = dst else { continue }; + + merged_locals.insert(relevant.original[src]); + merged_locals.insert(relevant.original[dst]); - apply_merges(body, tcx, merges, merged_locals); + relevant.union(src, dst); + live.union_rows(src, dst); } + trace!(?merged_locals); - trace!(round_count); + relevant.make_idempotent(); + + if merged_locals.is_empty() { + return; + } + + apply_merges(body, tcx, relevant, merged_locals); } fn is_required(&self) -> bool { @@ -246,30 +227,6 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation { } } -#[derive(Debug, Default)] -struct Candidates { - /// The set of candidates we are considering in this optimization. - /// - /// We will always merge the key into at most one of its values. - /// - /// Whether a place ends up in the key or the value does not correspond to whether it appears as - /// the lhs or rhs of any assignment. As a matter of fact, the places in here might never appear - /// in an assignment at all. This happens because if we see an assignment like this: - /// - /// ```ignore (syntax-highlighting-only) - /// _1.0 = _2.0 - /// ``` - /// - /// We will still report that we would like to merge `_1` and `_2` in an attempt to allow us to - /// remove that assignment. - c: FxIndexMap>, - - /// A reverse index of the `c` set; if the `c` set contains `a => Place { local: b, proj }`, - /// then this contains `b => a`. - // PERF: Possibly these should be `SmallVec`s? - reverse: FxIndexMap>, -} - ////////////////////////////////////////////////////////// // Merging // @@ -278,16 +235,16 @@ struct Candidates { fn apply_merges<'tcx>( body: &mut Body<'tcx>, tcx: TyCtxt<'tcx>, - merges: FxIndexMap, + relevant: RelevantLocals, merged_locals: DenseBitSet, ) { - let mut merger = Merger { tcx, merges, merged_locals }; + let mut merger = Merger { tcx, relevant, merged_locals }; merger.visit_body_preserves_cfg(body); } struct Merger<'tcx> { tcx: TyCtxt<'tcx>, - merges: FxIndexMap, + relevant: RelevantLocals, merged_locals: DenseBitSet, } @@ -297,8 +254,8 @@ impl<'tcx> MutVisitor<'tcx> for Merger<'tcx> { } fn visit_local(&mut self, local: &mut Local, _: PlaceContext, _location: Location) { - if let Some(dest) = self.merges.get(local) { - *local = *dest; + if let Some(relevant) = self.relevant.find(*local) { + *local = self.relevant.original[relevant]; } } @@ -336,376 +293,116 @@ impl<'tcx> MutVisitor<'tcx> for Merger<'tcx> { } ////////////////////////////////////////////////////////// -// Liveness filtering +// Relevant locals // -// This section enforces bullet point 2 +// Small utility to reduce size of the conflict matrix by only considering locals that appear in +// the candidates -struct FilterInformation<'a, 'tcx> { - body: &'a Body<'tcx>, - points: &'a DenseLocationMap, - live: &'a SparseIntervalMatrix, - candidates: &'a mut Candidates, - write_info: &'a mut WriteInfo, - at: Location, +newtype_index! { + /// Represent a subset of locals which appear in candidates. + struct RelevantLocal {} } -// We first implement some utility functions which we will expose removing candidates according to -// different needs. Throughout the liveness filtering, the `candidates` are only ever accessed -// through these methods, and not directly. -impl Candidates { - /// Collects the candidates for merging. - /// - /// This is responsible for enforcing the first and third bullet point. - fn reset_and_find<'tcx>(&mut self, body: &Body<'tcx>, borrowed: &DenseBitSet) { - self.c.clear(); - self.reverse.clear(); - let mut visitor = FindAssignments { body, candidates: &mut self.c, borrowed }; - visitor.visit_body(body); - // Deduplicate candidates. - for (_, cands) in self.c.iter_mut() { - cands.sort(); - cands.dedup(); - } - // Generate the reverse map. - for (src, cands) in self.c.iter() { - for dest in cands.iter().copied() { - self.reverse.entry(dest).or_default().push(*src); - } - } - } - - /// Just `Vec::retain`, but the condition is inverted and we add debugging output - fn vec_filter_candidates( - src: Local, - v: &mut Vec, - mut f: impl FnMut(Local) -> CandidateFilter, - at: Location, - ) { - v.retain(|dest| { - let remove = f(*dest); - if remove == CandidateFilter::Remove { - trace!("eliminating {:?} => {:?} due to conflict at {:?}", src, dest, at); - } - remove == CandidateFilter::Keep - }); - } - - /// `vec_filter_candidates` but for an `Entry` - fn entry_filter_candidates( - mut entry: IndexOccupiedEntry<'_, Local, Vec>, - p: Local, - f: impl FnMut(Local) -> CandidateFilter, - at: Location, - ) { - let candidates = entry.get_mut(); - Self::vec_filter_candidates(p, candidates, f, at); - if candidates.len() == 0 { - // FIXME(#120456) - is `swap_remove` correct? - entry.swap_remove(); - } - } - - /// For all candidates `(p, q)` or `(q, p)` removes the candidate if `f(q)` says to do so - fn filter_candidates_by( - &mut self, - p: Local, - mut f: impl FnMut(Local) -> CandidateFilter, - at: Location, - ) { - // Cover the cases where `p` appears as a `src` - if let IndexEntry::Occupied(entry) = self.c.entry(p) { - Self::entry_filter_candidates(entry, p, &mut f, at); - } - // And the cases where `p` appears as a `dest` - let Some(srcs) = self.reverse.get_mut(&p) else { - return; - }; - // We use `retain` here to remove the elements from the reverse set if we've removed the - // matching candidate in the forward set. - srcs.retain(|src| { - if f(*src) == CandidateFilter::Keep { - return true; - } - let IndexEntry::Occupied(entry) = self.c.entry(*src) else { - return false; - }; - Self::entry_filter_candidates( - entry, - *src, - |dest| { - if dest == p { CandidateFilter::Remove } else { CandidateFilter::Keep } - }, - at, - ); - false - }); - } +#[derive(Debug)] +struct RelevantLocals { + original: IndexVec, + shrink: IndexVec>, + renames: IndexVec, } -#[derive(Copy, Clone, PartialEq, Eq)] -enum CandidateFilter { - Keep, - Remove, -} +impl RelevantLocals { + #[tracing::instrument(level = "trace", skip(candidates, num_locals), ret)] + fn compute(candidates: &Candidates, num_locals: usize) -> RelevantLocals { + let mut original = IndexVec::with_capacity(candidates.c.len()); + let mut shrink = IndexVec::from_elem_n(None, num_locals); -impl<'a, 'tcx> FilterInformation<'a, 'tcx> { - /// Filters the set of candidates to remove those that conflict. - /// - /// The steps we take are exactly those that are outlined at the top of the file. For each - /// statement/terminator, we collect the set of locals that are written to in that - /// statement/terminator, and then we remove all pairs of candidates that contain one such local - /// and another one that is live. - /// - /// We need to be careful about the ordering of operations within each statement/terminator - /// here. Many statements might write and read from more than one place, and we need to consider - /// them all. The strategy for doing this is as follows: We first gather all the places that are - /// written to within the statement/terminator via `WriteInfo`. Then, we use the liveness - /// analysis from *before* the statement/terminator (in the control flow sense) to eliminate - /// candidates - this is because we want to conservatively treat a pair of locals that is both - /// read and written in the statement/terminator to be conflicting, and the liveness analysis - /// before the statement/terminator will correctly report locals that are read in the - /// statement/terminator to be live. We are additionally conservative by treating all written to - /// locals as also being read from. - fn filter_liveness( - candidates: &mut Candidates, - points: &DenseLocationMap, - live: &SparseIntervalMatrix, - write_info: &mut WriteInfo, - body: &Body<'tcx>, - ) { - let mut this = FilterInformation { - body, - points, - live, - candidates, - // We don't actually store anything at this scope, we just keep things here to be able - // to reuse the allocation. - write_info, - // Doesn't matter what we put here, will be overwritten before being used - at: Location::START, + // Mark a local as relevant and record it into the maps. + let mut declare = |local| { + shrink.get_or_insert_with(local, || original.push(local)); }; - this.internal_filter_liveness(); - } - - fn internal_filter_liveness(&mut self) { - for (block, data) in traversal::preorder(self.body) { - self.at = Location { block, statement_index: data.statements.len() }; - self.write_info.for_terminator(&data.terminator().kind); - self.apply_conflicts(); - for (i, statement) in data.statements.iter().enumerate().rev() { - self.at = Location { block, statement_index: i }; - self.write_info.for_statement(&statement.kind, self.body); - self.apply_conflicts(); + for (&src, destinations) in candidates.c.iter() { + declare(src); + for &dest in destinations { + declare(dest) } } - } - fn apply_conflicts(&mut self) { - let writes = &self.write_info.writes; - for p in writes { - let other_skip = self.write_info.skip_pair.and_then(|(a, b)| { - if a == *p { - Some(b) - } else if b == *p { - Some(a) - } else { - None - } - }); - let at = self.points.point_from_location(self.at); - self.candidates.filter_candidates_by( - *p, - |q| { - if Some(q) == other_skip { - return CandidateFilter::Keep; - } - // It is possible that a local may be live for less than the - // duration of a statement This happens in the case of function - // calls or inline asm. Because of this, we also mark locals as - // conflicting when both of them are written to in the same - // statement. - if self.live.contains(q, at) || writes.contains(&q) { - CandidateFilter::Remove - } else { - CandidateFilter::Keep - } - }, - self.at, - ); - } + let renames = IndexVec::from_fn_n(|l| l, original.len()); + RelevantLocals { original, shrink, renames } } -} -/// Describes where a statement/terminator writes to -#[derive(Default, Debug)] -struct WriteInfo { - writes: Vec, - /// If this pair of locals is a candidate pair, completely skip processing it during this - /// statement. All other candidates are unaffected. - skip_pair: Option<(Local, Local)>, -} - -impl WriteInfo { - fn for_statement<'tcx>(&mut self, statement: &StatementKind<'tcx>, body: &Body<'tcx>) { - self.reset(); - match statement { - StatementKind::Assign(box (lhs, rhs)) => { - self.add_place(*lhs); - match rhs { - Rvalue::Use(op) => { - self.add_operand(op); - self.consider_skipping_for_assign_use(*lhs, op, body); - } - Rvalue::Repeat(op, _) => { - self.add_operand(op); - } - Rvalue::Cast(_, op, _) - | Rvalue::UnaryOp(_, op) - | Rvalue::ShallowInitBox(op, _) => { - self.add_operand(op); - } - Rvalue::BinaryOp(_, ops) => { - for op in [&ops.0, &ops.1] { - self.add_operand(op); - } - } - Rvalue::Aggregate(_, ops) => { - for op in ops { - self.add_operand(op); - } - } - Rvalue::WrapUnsafeBinder(op, _) => { - self.add_operand(op); - } - Rvalue::ThreadLocalRef(_) - | Rvalue::NullaryOp(_, _) - | Rvalue::Ref(_, _, _) - | Rvalue::RawPtr(_, _) - | Rvalue::Len(_) - | Rvalue::Discriminant(_) - | Rvalue::CopyForDeref(_) => {} - } - } - // Retags are technically also reads, but reporting them as a write suffices - StatementKind::SetDiscriminant { place, .. } - | StatementKind::Deinit(place) - | StatementKind::Retag(_, place) => { - self.add_place(**place); - } - StatementKind::Intrinsic(_) - | StatementKind::ConstEvalCounter - | StatementKind::Nop - | StatementKind::Coverage(_) - | StatementKind::StorageLive(_) - | StatementKind::StorageDead(_) - | StatementKind::BackwardIncompatibleDropHint { .. } - | StatementKind::PlaceMention(_) => {} - StatementKind::FakeRead(_) | StatementKind::AscribeUserType(_, _) => { - bug!("{:?} not found in this MIR phase", statement) - } + fn find(&self, src: Local) -> Option { + let mut src = self.shrink[src]?; + while let s2 = self.renames[src] + && src != s2 + { + src = s2 } + Some(src) } - fn consider_skipping_for_assign_use<'tcx>( - &mut self, - lhs: Place<'tcx>, - rhs: &Operand<'tcx>, - body: &Body<'tcx>, - ) { - let Some(rhs) = rhs.place() else { return }; - if let Some(pair) = places_to_candidate_pair(lhs, rhs, body) { - self.skip_pair = Some(pair); - } + fn union(&mut self, lhs: RelevantLocal, rhs: RelevantLocal) { + self.renames[lhs] = rhs; } - fn for_terminator<'tcx>(&mut self, terminator: &TerminatorKind<'tcx>) { - self.reset(); - match terminator { - TerminatorKind::SwitchInt { discr: op, .. } - | TerminatorKind::Assert { cond: op, .. } => { - self.add_operand(op); - } - TerminatorKind::Call { destination, func, args, .. } => { - self.add_place(*destination); - self.add_operand(func); - for arg in args { - self.add_operand(&arg.node); - } - } - TerminatorKind::TailCall { func, args, .. } => { - self.add_operand(func); - for arg in args { - self.add_operand(&arg.node); - } - } - TerminatorKind::InlineAsm { operands, .. } => { - for asm_operand in operands { - match asm_operand { - InlineAsmOperand::In { value, .. } => { - self.add_operand(value); - } - InlineAsmOperand::Out { place, .. } => { - if let Some(place) = place { - self.add_place(*place); - } - } - // Note that the `late` field in `InOut` is about whether the registers used - // for these things overlap, and is of absolutely no interest to us. - InlineAsmOperand::InOut { in_value, out_place, .. } => { - if let Some(place) = out_place { - self.add_place(*place); - } - self.add_operand(in_value); - } - InlineAsmOperand::Const { .. } - | InlineAsmOperand::SymFn { .. } - | InlineAsmOperand::SymStatic { .. } - | InlineAsmOperand::Label { .. } => {} - } - } - } - TerminatorKind::Goto { .. } - | TerminatorKind::UnwindResume - | TerminatorKind::UnwindTerminate(_) - | TerminatorKind::Return - | TerminatorKind::Unreachable { .. } => (), - TerminatorKind::Drop { .. } => { - // `Drop`s create a `&mut` and so are not considered - } - TerminatorKind::Yield { .. } - | TerminatorKind::CoroutineDrop - | TerminatorKind::FalseEdge { .. } - | TerminatorKind::FalseUnwind { .. } => { - bug!("{:?} not found in this MIR phase", terminator) + fn make_idempotent(&mut self) { + for l in self.renames.indices() { + let mut h = self.renames[l]; + while let h2 = self.renames[h] + && h != h2 + { + h = h2 } + self.renames[l] = h; + debug_assert_eq!(h, self.renames[h], "non-idempotent for {l:?}"); } } +} - fn add_place(&mut self, place: Place<'_>) { - self.writes.push(place.local); - } +///////////////////////////////////////////////////// +// Candidate accumulation + +#[derive(Debug, Default)] +struct Candidates { + /// The set of candidates we are considering in this optimization. + /// + /// We will always merge the key into at most one of its values. + /// + /// Whether a place ends up in the key or the value does not correspond to whether it appears as + /// the lhs or rhs of any assignment. As a matter of fact, the places in here might never appear + /// in an assignment at all. This happens because if we see an assignment like this: + /// + /// ```ignore (syntax-highlighting-only) + /// _1.0 = _2.0 + /// ``` + /// + /// We will still report that we would like to merge `_1` and `_2` in an attempt to allow us to + /// remove that assignment. + c: FxIndexMap>, +} + +// We first implement some utility functions which we will expose removing candidates according to +// different needs. Throughout the liveness filtering, the `candidates` are only ever accessed +// through these methods, and not directly. +impl Candidates { + /// Collects the candidates for merging. + /// + /// This is responsible for enforcing the first and third bullet point. + fn find(body: &Body<'_>, borrowed: &DenseBitSet) -> Candidates { + let mut visitor = FindAssignments { body, candidates: Default::default(), borrowed }; + visitor.visit_body(body); - fn add_operand<'tcx>(&mut self, op: &Operand<'tcx>) { - match op { - // FIXME(JakobDegen): In a previous version, the `Move` case was incorrectly treated as - // being a read only. This was unsound, however we cannot add a regression test because - // it is not possible to set this off with current MIR. Once we have that ability, a - // regression test should be added. - Operand::Move(p) => self.add_place(*p), - Operand::Copy(_) | Operand::Constant(_) => (), + // Deduplicate candidates. + for (_, cands) in visitor.candidates.iter_mut() { + cands.sort(); + cands.dedup(); } - } - fn reset(&mut self) { - self.writes.clear(); - self.skip_pair = None; + Candidates { c: visitor.candidates } } } -///////////////////////////////////////////////////// -// Candidate accumulation - /// If the pair of places is being considered for merging, returns the candidate which would be /// merged in order to accomplish this. /// @@ -743,7 +440,7 @@ fn places_to_candidate_pair<'tcx>( struct FindAssignments<'a, 'tcx> { body: &'a Body<'tcx>, - candidates: &'a mut FxIndexMap>, + candidates: FxIndexMap>, borrowed: &'a DenseBitSet, } @@ -803,22 +500,252 @@ fn dest_prop_mir_dump<'tcx>( tcx: TyCtxt<'tcx>, body: &Body<'tcx>, points: &DenseLocationMap, - live: &SparseIntervalMatrix, - round: usize, + live: &SparseIntervalMatrix, + relevant: &RelevantLocals, ) { - let locals_live_at = |location| { + let locals_live_at = |location, effect| { let location = points.point_from_location(location); - live.rows().filter(|&r| live.contains(r, location)).collect::>() + let location = TwoStepIndex::new(location, effect); + live.rows() + .filter(|&r| live.contains(r, location)) + .map(|rl| relevant.original[rl]) + .collect::>() }; if let Some(dumper) = MirDumper::new(tcx, "DestinationPropagation-dataflow", body) { let extra_data = &|pass_where, w: &mut dyn std::io::Write| { if let PassWhere::BeforeLocation(loc) = pass_where { - writeln!(w, " // live: {:?}", locals_live_at(loc))?; + writeln!(w, " // before: {:?}", locals_live_at(loc, Effect::Before))?; + } + if let PassWhere::AfterLocation(loc) = pass_where { + writeln!(w, " // after: {:?}", locals_live_at(loc, Effect::After))?; } Ok(()) }; - dumper.set_disambiguator(&round).set_extra_data(extra_data).dump_mir(body) + dumper.set_extra_data(extra_data).dump_mir(body) + } +} + +struct MaybeTwoStepLiveLocals; + +#[derive(Copy, Clone, Debug)] +enum Effect { + Before, + After, +} + +rustc_index::newtype_index! { + /// A `PointIndex` but with the lower bit encoding early/late inside the statement. + #[orderable] + #[debug_format = "TwoStepIndex({})"] + struct TwoStepIndex {} +} + +impl TwoStepIndex { + fn new(point: PointIndex, effect: Effect) -> TwoStepIndex { + let effect = match effect { + Effect::Before => 0, + Effect::After => 1, + }; + TwoStepIndex::from_u32(2 * point.as_u32() + (effect as u32)) + } +} + +struct VisitPlacesWith(F); + +impl<'tcx, F> Visitor<'tcx> for VisitPlacesWith +where + F: FnMut(Place<'tcx>, PlaceContext), +{ + fn visit_place(&mut self, place: &Place<'tcx>, ctxt: PlaceContext, _: Location) { + (self.0)(*place, ctxt); + for proj in place.projection.iter() { + match proj { + ProjectionElem::Index(index) => (self.0)( + index.into(), + PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy), + ), + ProjectionElem::Deref + | ProjectionElem::Field(..) + | ProjectionElem::Downcast(..) + | ProjectionElem::ConstantIndex { .. } + | ProjectionElem::Subslice { .. } + | ProjectionElem::OpaqueCast(..) + | ProjectionElem::UnwrapUnsafeBinder(..) + | ProjectionElem::Subtype(..) => {} + }; + } + } +} + +impl<'tcx> Analysis<'tcx> for MaybeTwoStepLiveLocals { + type Domain = DenseBitSet; + type Direction = Backward; + + const NAME: &'static str = "transitive liveness"; + + fn bottom_value(&self, body: &Body<'tcx>) -> DenseBitSet { + // bottom = not live + DenseBitSet::new_empty(body.local_decls.len()) + } + + fn initialize_start_block(&self, _: &Body<'tcx>, _: &mut DenseBitSet) { + // No variables are live until we observe a use + } + + // This happens between the previous statement and this one. + #[tracing::instrument(level = "trace", skip(self, statement))] + fn apply_primary_statement_effect( + &mut self, + state: &mut DenseBitSet, + statement: &Statement<'tcx>, + location: Location, + ) { + VisitPlacesWith(|place, ctxt| match DefUse::for_place(place, ctxt) { + DefUse::Def => { + state.remove(place.local); + } + DefUse::Use => { + state.insert(place.local); + } + DefUse::PartialWrite | DefUse::NonUse => {} + }) + .visit_statement(statement, location); + } + + // This happens between this statement and the next one. + #[tracing::instrument(level = "trace", skip(self, statement))] + fn apply_early_statement_effect( + &mut self, + state: &mut DenseBitSet, + statement: &Statement<'tcx>, + location: Location, + ) { + // We need to ensure we have a non-zero live range even for dead stores. This is done by + // marking all the writes locals as live in the second half of the statement. + VisitPlacesWith(|place: Place<'tcx>, ctxt| match DefUse::for_place(place, ctxt) { + DefUse::Def | DefUse::PartialWrite => { + state.insert(place.local); + } + // We already perform the reads in the first part of the statement. As statements are + // not splittable, we do not need to re-read the same values. + DefUse::Use | DefUse::NonUse => {} + }) + .visit_statement(statement, location); + } + + // We model terminator as a special case in this two-step analysis. Consider the terminator + // `destination = func(arg0...)`. + // + // -- state at (location, Effect::Before) + // read(arg0)... + // write(destination) + // -- state at (location, Effect::After) + // read(arg0)... + + // This happens between the last statement and the terminator. + #[tracing::instrument(level = "trace", skip(self, terminator))] + fn apply_primary_terminator_effect<'mir>( + &mut self, + state: &mut DenseBitSet, + terminator: &'mir Terminator<'tcx>, + location: Location, + ) -> TerminatorEdges<'mir, 'tcx> { + // Consider that all writes in this terminator happen at the start of the execution of the + // terminator. For instance if we pass a return-pointer to a `Call` terminator. + VisitPlacesWith(|place: Place<'tcx>, ctxt| match DefUse::for_place(place, ctxt) { + DefUse::Def => { + state.remove(place.local); + } + DefUse::Use => { + state.insert(place.local); + } + DefUse::PartialWrite | DefUse::NonUse => {} + }) + .visit_terminator(terminator, location); + terminator.edges() + } + + // This happens between the terminator and the end of the block. + #[tracing::instrument(level = "trace", skip(self, terminator))] + fn apply_early_terminator_effect<'mir>( + &mut self, + state: &mut DenseBitSet, + terminator: &'mir Terminator<'tcx>, + location: Location, + ) { + // Consider that all reads in this terminator happen at the end of the execution of the + // terminator, even after it may have written to the destination local. For instance if we + // pass arguments as pointers to a `Call` terminator. + VisitPlacesWith(|place: Place<'tcx>, ctxt| match DefUse::for_place(place, ctxt) { + DefUse::Def | DefUse::Use | DefUse::PartialWrite => { + state.insert(place.local); + } + DefUse::NonUse => {} + }) + .visit_terminator(terminator, location); } } + +/// Add points depending on the result of the given dataflow analysis. +fn save_as_intervals<'tcx>( + elements: &DenseLocationMap, + body: &Body<'tcx>, + relevant: &IndexSlice, + results: Results>, +) -> SparseIntervalMatrix { + let mut values = SparseIntervalMatrix::new(2 * elements.num_points()); + let mut state = MaybeTwoStepLiveLocals.bottom_value(body); + let reachable_blocks = traversal::reachable_as_bitset(body); + + let two_step_loc = |location, effect| { + let point = elements.point_from_location(location); + TwoStepIndex::new(point, effect) + }; + let mut prepend_at = |state: &mut DenseBitSet, twostep| { + for (relevant, &original) in relevant.iter_enumerated() { + if state.contains(original) { + values.prepend(relevant, twostep); + } + } + }; + + // Iterate blocks in decreasing order, to visit locations in decreasing order. This + // allows to use the more efficient `prepend` method to interval sets. + for block in body.basic_blocks.indices().rev() { + if !reachable_blocks.contains(block) { + continue; + } + + state.clone_from(&results[block]); + + let block_data = &body.basic_blocks[block]; + let loc = Location { block, statement_index: block_data.statements.len() }; + + let term = block_data.terminator(); + let mut twostep = two_step_loc(loc, Effect::After); + MaybeTwoStepLiveLocals.apply_early_terminator_effect(&mut state, term, loc); + prepend_at(&mut state, twostep); + + twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); + debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before)); + MaybeTwoStepLiveLocals.apply_primary_terminator_effect(&mut state, term, loc); + prepend_at(&mut state, twostep); + + for (statement_index, stmt) in block_data.statements.iter().enumerate().rev() { + let loc = Location { block, statement_index }; + twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); + debug_assert_eq!(twostep, two_step_loc(loc, Effect::After)); + MaybeTwoStepLiveLocals.apply_early_statement_effect(&mut state, stmt, loc); + prepend_at(&mut state, twostep); + + twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); + debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before)); + MaybeTwoStepLiveLocals.apply_primary_statement_effect(&mut state, stmt, loc); + prepend_at(&mut state, twostep); + } + } + + values +} diff --git a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff index 5d8aaedae3742..f9b8881ae3b15 100644 --- a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff +++ b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff @@ -9,15 +9,15 @@ let mut _6: i32; scope 1 { - debug x => _1; -+ debug x => _6; ++ debug x => _4; let _2: i32; scope 2 { - debug y => _2; -+ debug y => _6; ++ debug y => _4; let _3: i32; scope 3 { - debug z => _3; -+ debug z => _6; ++ debug z => _4; } } } @@ -26,7 +26,7 @@ - StorageLive(_1); - _1 = val() -> [return: bb1, unwind unreachable]; + nop; -+ _6 = val() -> [return: bb1, unwind unreachable]; ++ _4 = val() -> [return: bb1, unwind unreachable]; } bb1: { @@ -47,16 +47,14 @@ + nop; + nop; StorageLive(_5); -- StorageLive(_6); + StorageLive(_6); - _6 = copy _1; -+ nop; -+ nop; ++ _6 = copy _4; _5 = std::mem::drop::(move _6) -> [return: bb2, unwind unreachable]; } bb2: { -- StorageDead(_6); -+ nop; + StorageDead(_6); StorageDead(_5); _0 = const (); - StorageDead(_3); diff --git a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff index 05c9bcc1d7371..013e40ff462de 100644 --- a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff +++ b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff @@ -9,15 +9,15 @@ let mut _6: i32; scope 1 { - debug x => _1; -+ debug x => _6; ++ debug x => _4; let _2: i32; scope 2 { - debug y => _2; -+ debug y => _6; ++ debug y => _4; let _3: i32; scope 3 { - debug z => _3; -+ debug z => _6; ++ debug z => _4; } } } @@ -26,7 +26,7 @@ - StorageLive(_1); - _1 = val() -> [return: bb1, unwind continue]; + nop; -+ _6 = val() -> [return: bb1, unwind continue]; ++ _4 = val() -> [return: bb1, unwind continue]; } bb1: { @@ -47,16 +47,14 @@ + nop; + nop; StorageLive(_5); -- StorageLive(_6); + StorageLive(_6); - _6 = copy _1; -+ nop; -+ nop; ++ _6 = copy _4; _5 = std::mem::drop::(move _6) -> [return: bb2, unwind continue]; } bb2: { -- StorageDead(_6); -+ nop; + StorageDead(_6); StorageDead(_5); _0 = const (); - StorageDead(_3); From b9262bce67f6d924d17632bf5539e4745e7b3a29 Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Mon, 18 Aug 2025 22:53:14 +0000 Subject: [PATCH 3/8] Use regular MaybeLiveLocals. --- compiler/rustc_index/src/interval.rs | 7 +- compiler/rustc_mir_transform/src/dest_prop.rs | 181 +++++------------- tests/mir-opt/nrvo_miscompile_111005.rs | 6 +- 3 files changed, 58 insertions(+), 136 deletions(-) diff --git a/compiler/rustc_index/src/interval.rs b/compiler/rustc_index/src/interval.rs index 69a7a69610e04..e81e7e2bc442a 100644 --- a/compiler/rustc_index/src/interval.rs +++ b/compiler/rustc_index/src/interval.rs @@ -146,8 +146,11 @@ impl IntervalSet { let point = point.index() as u32; if let Some((first_start, _)) = self.map.first_mut() { - assert!(point < *first_start); - if point + 1 == *first_start { + assert!(point <= *first_start); + if point == *first_start { + // The point is already present in the set. + } else if point + 1 == *first_start { + // Just extend the first range. *first_start = point; } else { self.map.insert(0, (point, point)); diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index da1d339b52658..aa9b0a961eada 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -140,13 +140,13 @@ use rustc_data_structures::fx::FxIndexMap; use rustc_index::bit_set::DenseBitSet; use rustc_index::interval::SparseIntervalMatrix; -use rustc_index::{IndexSlice, IndexVec, newtype_index}; +use rustc_index::{IndexVec, newtype_index}; use rustc_middle::mir::visit::{MutVisitor, NonMutatingUseContext, PlaceContext, Visitor}; use rustc_middle::mir::*; use rustc_middle::ty::TyCtxt; -use rustc_mir_dataflow::impls::DefUse; +use rustc_mir_dataflow::impls::{DefUse, MaybeLiveLocals}; use rustc_mir_dataflow::points::{DenseLocationMap, PointIndex}; -use rustc_mir_dataflow::{Analysis, Backward, Results}; +use rustc_mir_dataflow::{Analysis, Results}; use tracing::{debug, trace}; pub(super) struct DestinationPropagation; @@ -177,12 +177,11 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation { return; } - let live = - MaybeTwoStepLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp")); + let live = MaybeLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp")); let points = DenseLocationMap::new(body); let mut relevant = RelevantLocals::compute(&candidates, body.local_decls.len()); - let mut live = save_as_intervals(&points, body, &relevant.original, live.results); + let mut live = save_as_intervals(&points, body, &relevant, live.results); dest_prop_mir_dump(tcx, body, &points, &live, &relevant); @@ -527,8 +526,6 @@ fn dest_prop_mir_dump<'tcx>( } } -struct MaybeTwoStepLiveLocals; - #[derive(Copy, Clone, Debug)] enum Effect { Before, @@ -579,137 +576,29 @@ where } } -impl<'tcx> Analysis<'tcx> for MaybeTwoStepLiveLocals { - type Domain = DenseBitSet; - type Direction = Backward; - - const NAME: &'static str = "transitive liveness"; - - fn bottom_value(&self, body: &Body<'tcx>) -> DenseBitSet { - // bottom = not live - DenseBitSet::new_empty(body.local_decls.len()) - } - - fn initialize_start_block(&self, _: &Body<'tcx>, _: &mut DenseBitSet) { - // No variables are live until we observe a use - } - - // This happens between the previous statement and this one. - #[tracing::instrument(level = "trace", skip(self, statement))] - fn apply_primary_statement_effect( - &mut self, - state: &mut DenseBitSet, - statement: &Statement<'tcx>, - location: Location, - ) { - VisitPlacesWith(|place, ctxt| match DefUse::for_place(place, ctxt) { - DefUse::Def => { - state.remove(place.local); - } - DefUse::Use => { - state.insert(place.local); - } - DefUse::PartialWrite | DefUse::NonUse => {} - }) - .visit_statement(statement, location); - } - - // This happens between this statement and the next one. - #[tracing::instrument(level = "trace", skip(self, statement))] - fn apply_early_statement_effect( - &mut self, - state: &mut DenseBitSet, - statement: &Statement<'tcx>, - location: Location, - ) { - // We need to ensure we have a non-zero live range even for dead stores. This is done by - // marking all the writes locals as live in the second half of the statement. - VisitPlacesWith(|place: Place<'tcx>, ctxt| match DefUse::for_place(place, ctxt) { - DefUse::Def | DefUse::PartialWrite => { - state.insert(place.local); - } - // We already perform the reads in the first part of the statement. As statements are - // not splittable, we do not need to re-read the same values. - DefUse::Use | DefUse::NonUse => {} - }) - .visit_statement(statement, location); - } - - // We model terminator as a special case in this two-step analysis. Consider the terminator - // `destination = func(arg0...)`. - // - // -- state at (location, Effect::Before) - // read(arg0)... - // write(destination) - // -- state at (location, Effect::After) - // read(arg0)... - - // This happens between the last statement and the terminator. - #[tracing::instrument(level = "trace", skip(self, terminator))] - fn apply_primary_terminator_effect<'mir>( - &mut self, - state: &mut DenseBitSet, - terminator: &'mir Terminator<'tcx>, - location: Location, - ) -> TerminatorEdges<'mir, 'tcx> { - // Consider that all writes in this terminator happen at the start of the execution of the - // terminator. For instance if we pass a return-pointer to a `Call` terminator. - VisitPlacesWith(|place: Place<'tcx>, ctxt| match DefUse::for_place(place, ctxt) { - DefUse::Def => { - state.remove(place.local); - } - DefUse::Use => { - state.insert(place.local); - } - DefUse::PartialWrite | DefUse::NonUse => {} - }) - .visit_terminator(terminator, location); - terminator.edges() - } - - // This happens between the terminator and the end of the block. - #[tracing::instrument(level = "trace", skip(self, terminator))] - fn apply_early_terminator_effect<'mir>( - &mut self, - state: &mut DenseBitSet, - terminator: &'mir Terminator<'tcx>, - location: Location, - ) { - // Consider that all reads in this terminator happen at the end of the execution of the - // terminator, even after it may have written to the destination local. For instance if we - // pass arguments as pointers to a `Call` terminator. - VisitPlacesWith(|place: Place<'tcx>, ctxt| match DefUse::for_place(place, ctxt) { - DefUse::Def | DefUse::Use | DefUse::PartialWrite => { - state.insert(place.local); - } - DefUse::NonUse => {} - }) - .visit_terminator(terminator, location); - } -} - /// Add points depending on the result of the given dataflow analysis. fn save_as_intervals<'tcx>( elements: &DenseLocationMap, body: &Body<'tcx>, - relevant: &IndexSlice, + relevant: &RelevantLocals, results: Results>, ) -> SparseIntervalMatrix { let mut values = SparseIntervalMatrix::new(2 * elements.num_points()); - let mut state = MaybeTwoStepLiveLocals.bottom_value(body); + let mut state = MaybeLiveLocals.bottom_value(body); let reachable_blocks = traversal::reachable_as_bitset(body); let two_step_loc = |location, effect| { let point = elements.point_from_location(location); TwoStepIndex::new(point, effect) }; - let mut prepend_at = |state: &mut DenseBitSet, twostep| { - for (relevant, &original) in relevant.iter_enumerated() { - if state.contains(original) { - values.prepend(relevant, twostep); + let prepend_at = + |values: &mut SparseIntervalMatrix<_, _>, state: &DenseBitSet, twostep| { + for (relevant, &original) in relevant.original.iter_enumerated() { + if state.contains(original) { + values.prepend(relevant, twostep); + } } - } - }; + }; // Iterate blocks in decreasing order, to visit locations in decreasing order. This // allows to use the more efficient `prepend` method to interval sets. @@ -725,25 +614,51 @@ fn save_as_intervals<'tcx>( let term = block_data.terminator(); let mut twostep = two_step_loc(loc, Effect::After); - MaybeTwoStepLiveLocals.apply_early_terminator_effect(&mut state, term, loc); - prepend_at(&mut state, twostep); + prepend_at(&mut values, &state, twostep); + // Ensure we have a non-zero live range even for dead stores. This is done by marking all + // the written-to locals as live in the second half of the statement. + // We also ensure that operands read by terminators conflict with writes by that terminator. + // For instance a function call may read args after having written to the destination. + VisitPlacesWith(|place, ctxt| match DefUse::for_place(place, ctxt) { + DefUse::Def | DefUse::Use | DefUse::PartialWrite => { + if let Some(relevant) = relevant.shrink[place.local] { + values.insert(relevant, twostep); + } + } + DefUse::NonUse => {} + }) + .visit_terminator(term, loc); twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before)); - MaybeTwoStepLiveLocals.apply_primary_terminator_effect(&mut state, term, loc); - prepend_at(&mut state, twostep); + MaybeLiveLocals.apply_early_terminator_effect(&mut state, term, loc); + MaybeLiveLocals.apply_primary_terminator_effect(&mut state, term, loc); + prepend_at(&mut values, &state, twostep); for (statement_index, stmt) in block_data.statements.iter().enumerate().rev() { let loc = Location { block, statement_index }; twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); debug_assert_eq!(twostep, two_step_loc(loc, Effect::After)); - MaybeTwoStepLiveLocals.apply_early_statement_effect(&mut state, stmt, loc); - prepend_at(&mut state, twostep); + prepend_at(&mut values, &state, twostep); + // Ensure we have a non-zero live range even for dead stores. This is done by marking + // all the written-to locals as live in the second half of the statement. + VisitPlacesWith(|place, ctxt| match DefUse::for_place(place, ctxt) { + DefUse::Def | DefUse::PartialWrite => { + if let Some(relevant) = relevant.shrink[place.local] { + values.insert(relevant, twostep); + } + } + DefUse::Use | DefUse::NonUse => {} + }) + .visit_statement(stmt, loc); twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before)); - MaybeTwoStepLiveLocals.apply_primary_statement_effect(&mut state, stmt, loc); - prepend_at(&mut state, twostep); + MaybeLiveLocals.apply_early_statement_effect(&mut state, stmt, loc); + MaybeLiveLocals.apply_primary_statement_effect(&mut state, stmt, loc); + // ... but reads from operands are marked as live here so they do not conflict with + // the all the writes we manually marked as live in the second half of the statement. + prepend_at(&mut values, &state, twostep); } } diff --git a/tests/mir-opt/nrvo_miscompile_111005.rs b/tests/mir-opt/nrvo_miscompile_111005.rs index 03008fa819156..131f7b8f6f991 100644 --- a/tests/mir-opt/nrvo_miscompile_111005.rs +++ b/tests/mir-opt/nrvo_miscompile_111005.rs @@ -1,4 +1,3 @@ -// skip-filecheck // This is a miscompilation, #111005 to track //@ test-mir-pass: RenameReturnPlace @@ -10,6 +9,11 @@ use core::intrinsics::mir::*; // EMIT_MIR nrvo_miscompile_111005.wrong.RenameReturnPlace.diff #[custom_mir(dialect = "runtime", phase = "initial")] pub fn wrong(arg: char) -> char { + // CHECK-LABEL: fn wrong( + // CHECK: _0 = copy _1; + // FIXME: This is wrong: + // CHECK-NEXT: _0 = const 'b'; + // CHECK-NEXT: return; mir! { { let temp = arg; From de7c633ee5a20f938819033b1111777bd6496a3b Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Tue, 19 Aug 2025 23:27:06 +0000 Subject: [PATCH 4/8] Simplify VisitPlacesWith. --- compiler/rustc_mir_transform/src/dest_prop.rs | 25 ++++++------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index aa9b0a961eada..fde45229d520d 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -141,7 +141,7 @@ use rustc_data_structures::fx::FxIndexMap; use rustc_index::bit_set::DenseBitSet; use rustc_index::interval::SparseIntervalMatrix; use rustc_index::{IndexVec, newtype_index}; -use rustc_middle::mir::visit::{MutVisitor, NonMutatingUseContext, PlaceContext, Visitor}; +use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor}; use rustc_middle::mir::*; use rustc_middle::ty::TyCtxt; use rustc_mir_dataflow::impls::{DefUse, MaybeLiveLocals}; @@ -555,24 +555,13 @@ impl<'tcx, F> Visitor<'tcx> for VisitPlacesWith where F: FnMut(Place<'tcx>, PlaceContext), { - fn visit_place(&mut self, place: &Place<'tcx>, ctxt: PlaceContext, _: Location) { + fn visit_local(&mut self, local: Local, ctxt: PlaceContext, _: Location) { + (self.0)(local.into(), ctxt); + } + + fn visit_place(&mut self, place: &Place<'tcx>, ctxt: PlaceContext, location: Location) { (self.0)(*place, ctxt); - for proj in place.projection.iter() { - match proj { - ProjectionElem::Index(index) => (self.0)( - index.into(), - PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy), - ), - ProjectionElem::Deref - | ProjectionElem::Field(..) - | ProjectionElem::Downcast(..) - | ProjectionElem::ConstantIndex { .. } - | ProjectionElem::Subslice { .. } - | ProjectionElem::OpaqueCast(..) - | ProjectionElem::UnwrapUnsafeBinder(..) - | ProjectionElem::Subtype(..) => {} - }; - } + self.visit_projection(place.as_ref(), ctxt, location); } } From 4e9dd1b67bf215865b95558ee4528ad43a5fd38c Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Fri, 22 Aug 2025 20:51:46 +0000 Subject: [PATCH 5/8] Do not use prepend to avoid quadratic behaviour. --- compiler/rustc_index/src/interval.rs | 44 ++++------------ compiler/rustc_mir_transform/src/dest_prop.rs | 51 ++++++++++--------- 2 files changed, 37 insertions(+), 58 deletions(-) diff --git a/compiler/rustc_index/src/interval.rs b/compiler/rustc_index/src/interval.rs index e81e7e2bc442a..dda5253e7c547 100644 --- a/compiler/rustc_index/src/interval.rs +++ b/compiler/rustc_index/src/interval.rs @@ -140,42 +140,20 @@ impl IntervalSet { result } - /// Specialized version of `insert` when we know that the inserted point is *before* any - /// contained. - pub fn prepend(&mut self, point: I) { - let point = point.index() as u32; - - if let Some((first_start, _)) = self.map.first_mut() { - assert!(point <= *first_start); - if point == *first_start { - // The point is already present in the set. - } else if point + 1 == *first_start { - // Just extend the first range. - *first_start = point; - } else { - self.map.insert(0, (point, point)); - } - } else { - // If the map is empty, push is faster than insert. - self.map.push((point, point)); - } - - debug_assert!( - self.check_invariants(), - "wrong intervals after prepend {point:?} to {self:?}" - ); - } - /// Specialized version of `insert` when we know that the inserted point is *after* any /// contained. pub fn append(&mut self, point: I) { let point = point.index() as u32; - if let Some((_, last_end)) = self.map.last_mut() - && let _ = assert!(*last_end < point) - && point == *last_end + 1 - { - *last_end = point; + if let Some((_, last_end)) = self.map.last_mut() { + assert!(*last_end <= point); + if point == *last_end { + // The point is already in the set. + } else if point == *last_end + 1 { + *last_end = point; + } else { + self.map.push((point, point)); + } } else { self.map.push((point, point)); } @@ -397,10 +375,6 @@ impl SparseIntervalMatrix { self.ensure_row(row).insert(point) } - pub fn prepend(&mut self, row: R, point: C) { - self.ensure_row(row).prepend(point) - } - pub fn append(&mut self, row: R, point: C) { self.ensure_row(row).append(point) } diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index fde45229d520d..7f2b786e4da5e 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -145,7 +145,7 @@ use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor}; use rustc_middle::mir::*; use rustc_middle::ty::TyCtxt; use rustc_mir_dataflow::impls::{DefUse, MaybeLiveLocals}; -use rustc_mir_dataflow::points::{DenseLocationMap, PointIndex}; +use rustc_mir_dataflow::points::DenseLocationMap; use rustc_mir_dataflow::{Analysis, Results}; use tracing::{debug, trace}; @@ -502,9 +502,7 @@ fn dest_prop_mir_dump<'tcx>( live: &SparseIntervalMatrix, relevant: &RelevantLocals, ) { - let locals_live_at = |location, effect| { - let location = points.point_from_location(location); - let location = TwoStepIndex::new(location, effect); + let locals_live_at = |location| { live.rows() .filter(|&r| live.contains(r, location)) .map(|rl| relevant.original[rl]) @@ -514,10 +512,14 @@ fn dest_prop_mir_dump<'tcx>( if let Some(dumper) = MirDumper::new(tcx, "DestinationPropagation-dataflow", body) { let extra_data = &|pass_where, w: &mut dyn std::io::Write| { if let PassWhere::BeforeLocation(loc) = pass_where { - writeln!(w, " // before: {:?}", locals_live_at(loc, Effect::Before))?; + let location = TwoStepIndex::new(points, loc, Effect::Before); + let live = locals_live_at(location); + writeln!(w, " // before: {:?} => {:?}", location, live)?; } if let PassWhere::AfterLocation(loc) = pass_where { - writeln!(w, " // after: {:?}", locals_live_at(loc, Effect::After))?; + let location = TwoStepIndex::new(points, loc, Effect::After); + let live = locals_live_at(location); + writeln!(w, " // after: {:?} => {:?}", location, live)?; } Ok(()) }; @@ -533,19 +535,25 @@ enum Effect { } rustc_index::newtype_index! { - /// A `PointIndex` but with the lower bit encoding early/late inside the statement. + /// A reversed `PointIndex` but with the lower bit encoding early/late inside the statement. + /// The reversed order allows to use the more efficient `IntervalSet::append` method while we + /// iterate on the statements in reverse order. #[orderable] #[debug_format = "TwoStepIndex({})"] struct TwoStepIndex {} } impl TwoStepIndex { - fn new(point: PointIndex, effect: Effect) -> TwoStepIndex { + fn new(elements: &DenseLocationMap, location: Location, effect: Effect) -> TwoStepIndex { + let point = elements.point_from_location(location); let effect = match effect { Effect::Before => 0, Effect::After => 1, }; - TwoStepIndex::from_u32(2 * point.as_u32() + (effect as u32)) + let max_index = 2 * elements.num_points() as u32 - 1; + let index = 2 * point.as_u32() + (effect as u32); + // Reverse the indexing to use more efficient `IntervalSet::append`. + TwoStepIndex::from_u32(max_index - index) } } @@ -576,21 +584,18 @@ fn save_as_intervals<'tcx>( let mut state = MaybeLiveLocals.bottom_value(body); let reachable_blocks = traversal::reachable_as_bitset(body); - let two_step_loc = |location, effect| { - let point = elements.point_from_location(location); - TwoStepIndex::new(point, effect) - }; - let prepend_at = + let two_step_loc = |location, effect| TwoStepIndex::new(elements, location, effect); + let append_at = |values: &mut SparseIntervalMatrix<_, _>, state: &DenseBitSet, twostep| { for (relevant, &original) in relevant.original.iter_enumerated() { if state.contains(original) { - values.prepend(relevant, twostep); + values.append(relevant, twostep); } } }; // Iterate blocks in decreasing order, to visit locations in decreasing order. This - // allows to use the more efficient `prepend` method to interval sets. + // allows to use the more efficient `append` method to interval sets. for block in body.basic_blocks.indices().rev() { if !reachable_blocks.contains(block) { continue; @@ -603,7 +608,7 @@ fn save_as_intervals<'tcx>( let term = block_data.terminator(); let mut twostep = two_step_loc(loc, Effect::After); - prepend_at(&mut values, &state, twostep); + append_at(&mut values, &state, twostep); // Ensure we have a non-zero live range even for dead stores. This is done by marking all // the written-to locals as live in the second half of the statement. // We also ensure that operands read by terminators conflict with writes by that terminator. @@ -618,17 +623,17 @@ fn save_as_intervals<'tcx>( }) .visit_terminator(term, loc); - twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); + twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1); debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before)); MaybeLiveLocals.apply_early_terminator_effect(&mut state, term, loc); MaybeLiveLocals.apply_primary_terminator_effect(&mut state, term, loc); - prepend_at(&mut values, &state, twostep); + append_at(&mut values, &state, twostep); for (statement_index, stmt) in block_data.statements.iter().enumerate().rev() { let loc = Location { block, statement_index }; - twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); + twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1); debug_assert_eq!(twostep, two_step_loc(loc, Effect::After)); - prepend_at(&mut values, &state, twostep); + append_at(&mut values, &state, twostep); // Ensure we have a non-zero live range even for dead stores. This is done by marking // all the written-to locals as live in the second half of the statement. VisitPlacesWith(|place, ctxt| match DefUse::for_place(place, ctxt) { @@ -641,13 +646,13 @@ fn save_as_intervals<'tcx>( }) .visit_statement(stmt, loc); - twostep = TwoStepIndex::from_u32(twostep.as_u32() - 1); + twostep = TwoStepIndex::from_u32(twostep.as_u32() + 1); debug_assert_eq!(twostep, two_step_loc(loc, Effect::Before)); MaybeLiveLocals.apply_early_statement_effect(&mut state, stmt, loc); MaybeLiveLocals.apply_primary_statement_effect(&mut state, stmt, loc); // ... but reads from operands are marked as live here so they do not conflict with // the all the writes we manually marked as live in the second half of the statement. - prepend_at(&mut values, &state, twostep); + append_at(&mut values, &state, twostep); } } From 99f6bcf38065c78897a80c9fc7f8c49962df0ab7 Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Tue, 26 Aug 2025 23:22:59 +0000 Subject: [PATCH 6/8] Unify a source with all possible destinations. --- compiler/rustc_mir_transform/src/dest_prop.rs | 98 ++++++++----------- ...in.DestinationPropagation.panic-abort.diff | 21 ++-- ...n.DestinationPropagation.panic-unwind.diff | 21 ++-- ...stinationPropagation.after.panic-abort.mir | 6 +- ...tinationPropagation.after.panic-unwind.mir | 6 +- ...stinationPropagation.after.panic-abort.mir | 6 +- ...tinationPropagation.after.panic-unwind.mir | 6 +- ...py.enum_clone_as_copy.PreCodegen.after.mir | 4 +- 8 files changed, 75 insertions(+), 93 deletions(-) diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index 7f2b786e4da5e..359cd622608fa 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -190,29 +190,51 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation { for (src, candidates) in candidates.c.into_iter() { trace!(?src, ?candidates); - let Some(src) = relevant.find(src) else { continue }; - let Some(src_live_ranges) = &live.row(src) else { continue }; - trace!(?src, ?src_live_ranges); - - let dst = candidates.into_iter().find_map(|dst| { - let dst = relevant.find(dst)?; - let dst_live_ranges = &live.row(dst)?; + for dst in candidates { + // We call `relevant.find(src)` inside the loop, as a previous iteration may have + // renamed `src` to one of the locals in `dst`. + let Some(mut src) = relevant.find(src) else { continue }; + let Some(src_live_ranges) = live.row(src) else { continue }; + trace!(?src, ?src_live_ranges); + + let Some(mut dst) = relevant.find(dst) else { continue }; + let Some(dst_live_ranges) = live.row(dst) else { continue }; trace!(?dst, ?dst_live_ranges); - let disjoint = src_live_ranges.disjoint(dst_live_ranges); - disjoint.then_some(dst) - }); - let Some(dst) = dst else { continue }; + if src_live_ranges.disjoint(dst_live_ranges) { + // We want to replace `src` by `dst`. + let mut orig_src = relevant.original[src]; + let mut orig_dst = relevant.original[dst]; + + // The return place and function arguments are required and cannot be renamed. + // This check cannot be made during candidate collection, as we may want to + // unify the same non-required local with several required locals. + match (is_local_required(orig_src, body), is_local_required(orig_dst, body)) { + // Renaming `src` is ok. + (false, _) => {} + // Renaming `src` is wrong, but renaming `dst` is ok. + (true, false) => { + std::mem::swap(&mut src, &mut dst); + std::mem::swap(&mut orig_src, &mut orig_dst); + } + // Neither local can be renamed, so skip this case. + (true, true) => continue, + } - merged_locals.insert(relevant.original[src]); - merged_locals.insert(relevant.original[dst]); + trace!(?src, ?dst, "merge"); + merged_locals.insert(orig_src); + merged_locals.insert(orig_dst); - relevant.union(src, dst); - live.union_rows(src, dst); + // Replace `src` by `dst`. + relevant.union(src, dst); + live.union_rows(/* read */ src, /* write */ dst); + } + } } trace!(?merged_locals); relevant.make_idempotent(); + trace!(?relevant.renames); if merged_locals.is_empty() { return; @@ -402,41 +424,6 @@ impl Candidates { } } -/// If the pair of places is being considered for merging, returns the candidate which would be -/// merged in order to accomplish this. -/// -/// The contract here is in one direction - there is a guarantee that merging the locals that are -/// outputted by this function would result in an assignment between the inputs becoming a -/// self-assignment. However, there is no guarantee that the returned pair is actually suitable for -/// merging - candidate collection must still check this independently. -/// -/// This output is unique for each unordered pair of input places. -fn places_to_candidate_pair<'tcx>( - a: Place<'tcx>, - b: Place<'tcx>, - body: &Body<'tcx>, -) -> Option<(Local, Local)> { - let (mut a, mut b) = if a.projection.len() == 0 && b.projection.len() == 0 { - (a.local, b.local) - } else { - return None; - }; - - // By sorting, we make sure we're input order independent - if a > b { - std::mem::swap(&mut a, &mut b); - } - - // We could now return `(a, b)`, but then we miss some candidates in the case where `a` can't be - // used as a `src`. - if is_local_required(a, body) { - std::mem::swap(&mut a, &mut b); - } - // We could check `is_local_required` again here, but there's no need - after all, we make no - // promise that the candidate pair is actually valid - Some((a, b)) -} - struct FindAssignments<'a, 'tcx> { body: &'a Body<'tcx>, candidates: FxIndexMap>, @@ -449,11 +436,9 @@ impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> { lhs, Rvalue::CopyForDeref(rhs) | Rvalue::Use(Operand::Copy(rhs) | Operand::Move(rhs)), )) = &statement.kind + && let Some(src) = lhs.as_local() + && let Some(dest) = rhs.as_local() { - let Some((src, dest)) = places_to_candidate_pair(*lhs, *rhs, self.body) else { - return; - }; - // As described at the top of the file, we do not go near things that have // their address taken. if self.borrowed.contains(src) || self.borrowed.contains(dest) { @@ -470,11 +455,6 @@ impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> { return; } - // Also, we need to make sure that MIR actually allows the `src` to be removed - if is_local_required(src, self.body) { - return; - } - // We may insert duplicates here, but that's fine self.candidates.entry(src).or_default().push(dest); } diff --git a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff index f9b8881ae3b15..a2f10be31a9e8 100644 --- a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff +++ b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-abort.diff @@ -8,25 +8,23 @@ let _5: (); let mut _6: i32; scope 1 { -- debug x => _1; -+ debug x => _4; + debug x => _1; let _2: i32; scope 2 { - debug y => _2; -+ debug y => _4; ++ debug y => _1; let _3: i32; scope 3 { - debug z => _3; -+ debug z => _4; ++ debug z => _1; } } } bb0: { - StorageLive(_1); -- _1 = val() -> [return: bb1, unwind unreachable]; + nop; -+ _4 = val() -> [return: bb1, unwind unreachable]; + _1 = val() -> [return: bb1, unwind unreachable]; } bb1: { @@ -47,14 +45,17 @@ + nop; + nop; StorageLive(_5); - StorageLive(_6); +- StorageLive(_6); - _6 = copy _1; -+ _6 = copy _4; - _5 = std::mem::drop::(move _6) -> [return: bb2, unwind unreachable]; +- _5 = std::mem::drop::(move _6) -> [return: bb2, unwind unreachable]; ++ nop; ++ nop; ++ _5 = std::mem::drop::(move _1) -> [return: bb2, unwind unreachable]; } bb2: { - StorageDead(_6); +- StorageDead(_6); ++ nop; StorageDead(_5); _0 = const (); - StorageDead(_3); diff --git a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff index 013e40ff462de..a08488615b15a 100644 --- a/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff +++ b/tests/mir-opt/dest-prop/cycle.main.DestinationPropagation.panic-unwind.diff @@ -8,25 +8,23 @@ let _5: (); let mut _6: i32; scope 1 { -- debug x => _1; -+ debug x => _4; + debug x => _1; let _2: i32; scope 2 { - debug y => _2; -+ debug y => _4; ++ debug y => _1; let _3: i32; scope 3 { - debug z => _3; -+ debug z => _4; ++ debug z => _1; } } } bb0: { - StorageLive(_1); -- _1 = val() -> [return: bb1, unwind continue]; + nop; -+ _4 = val() -> [return: bb1, unwind continue]; + _1 = val() -> [return: bb1, unwind continue]; } bb1: { @@ -47,14 +45,17 @@ + nop; + nop; StorageLive(_5); - StorageLive(_6); +- StorageLive(_6); - _6 = copy _1; -+ _6 = copy _4; - _5 = std::mem::drop::(move _6) -> [return: bb2, unwind continue]; +- _5 = std::mem::drop::(move _6) -> [return: bb2, unwind continue]; ++ nop; ++ nop; ++ _5 = std::mem::drop::(move _1) -> [return: bb2, unwind continue]; } bb2: { - StorageDead(_6); +- StorageDead(_6); ++ nop; StorageDead(_5); _0 = const (); - StorageDead(_3); diff --git a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir index eb4209731c62f..15061da812063 100644 --- a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir +++ b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-abort.mir @@ -7,16 +7,16 @@ fn f(_1: usize) -> usize { let mut _3: usize; let mut _4: usize; scope 1 { - debug b => _3; + debug b => _2; } bb0: { nop; - _3 = copy _1; + _2 = copy _1; _1 = const 5_usize; nop; nop; - _1 = move _3; + _1 = move _2; nop; nop; nop; diff --git a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir index fe9a7376a5856..ddfe4dc5b3ebf 100644 --- a/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir +++ b/tests/mir-opt/dest-prop/dead_stores_79191.f.DestinationPropagation.after.panic-unwind.mir @@ -7,16 +7,16 @@ fn f(_1: usize) -> usize { let mut _3: usize; let mut _4: usize; scope 1 { - debug b => _3; + debug b => _2; } bb0: { nop; - _3 = copy _1; + _2 = copy _1; _1 = const 5_usize; nop; nop; - _1 = move _3; + _1 = move _2; nop; nop; nop; diff --git a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir index eb4209731c62f..15061da812063 100644 --- a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir +++ b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-abort.mir @@ -7,16 +7,16 @@ fn f(_1: usize) -> usize { let mut _3: usize; let mut _4: usize; scope 1 { - debug b => _3; + debug b => _2; } bb0: { nop; - _3 = copy _1; + _2 = copy _1; _1 = const 5_usize; nop; nop; - _1 = move _3; + _1 = move _2; nop; nop; nop; diff --git a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir index fe9a7376a5856..ddfe4dc5b3ebf 100644 --- a/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir +++ b/tests/mir-opt/dest-prop/dead_stores_better.f.DestinationPropagation.after.panic-unwind.mir @@ -7,16 +7,16 @@ fn f(_1: usize) -> usize { let mut _3: usize; let mut _4: usize; scope 1 { - debug b => _3; + debug b => _2; } bb0: { nop; - _3 = copy _1; + _2 = copy _1; _1 = const 5_usize; nop; nop; - _1 = move _3; + _1 = move _2; nop; nop; nop; diff --git a/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir b/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir index 9f88e1961ec88..e67f362ee04ae 100644 --- a/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir +++ b/tests/mir-opt/pre-codegen/clone_as_copy.enum_clone_as_copy.PreCodegen.after.mir @@ -6,8 +6,8 @@ fn enum_clone_as_copy(_1: &Enum1) -> Enum1 { scope 1 (inlined ::clone) { debug self => _1; let mut _2: isize; - let mut _3: &AllCopy; - let mut _4: &NestCopy; + let _3: &AllCopy; + let _4: &NestCopy; scope 2 { debug __self_0 => _3; scope 6 (inlined ::clone) { From 28ff5cf502a0132a4eed8aa46fa7371c7e7ed3f1 Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Tue, 26 Aug 2025 23:38:45 +0000 Subject: [PATCH 7/8] Simplify candidate collection. --- compiler/rustc_mir_transform/src/dest_prop.rs | 98 ++++++++----------- 1 file changed, 43 insertions(+), 55 deletions(-) diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index 359cd622608fa..b6306c4869bd9 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -137,7 +137,6 @@ //! [attempt 3]: https://github.com/rust-lang/rust/pull/72632 //! [attempt 4]: https://github.com/rust-lang/rust/pull/96451 -use rustc_data_structures::fx::FxIndexMap; use rustc_index::bit_set::DenseBitSet; use rustc_index::interval::SparseIntervalMatrix; use rustc_index::{IndexVec, newtype_index}; @@ -187,48 +186,47 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation { let mut merged_locals = DenseBitSet::new_empty(body.local_decls.len()); - for (src, candidates) in candidates.c.into_iter() { - trace!(?src, ?candidates); - - for dst in candidates { - // We call `relevant.find(src)` inside the loop, as a previous iteration may have - // renamed `src` to one of the locals in `dst`. - let Some(mut src) = relevant.find(src) else { continue }; - let Some(src_live_ranges) = live.row(src) else { continue }; - trace!(?src, ?src_live_ranges); - - let Some(mut dst) = relevant.find(dst) else { continue }; - let Some(dst_live_ranges) = live.row(dst) else { continue }; - trace!(?dst, ?dst_live_ranges); - - if src_live_ranges.disjoint(dst_live_ranges) { - // We want to replace `src` by `dst`. - let mut orig_src = relevant.original[src]; - let mut orig_dst = relevant.original[dst]; - - // The return place and function arguments are required and cannot be renamed. - // This check cannot be made during candidate collection, as we may want to - // unify the same non-required local with several required locals. - match (is_local_required(orig_src, body), is_local_required(orig_dst, body)) { - // Renaming `src` is ok. - (false, _) => {} - // Renaming `src` is wrong, but renaming `dst` is ok. - (true, false) => { - std::mem::swap(&mut src, &mut dst); - std::mem::swap(&mut orig_src, &mut orig_dst); - } - // Neither local can be renamed, so skip this case. - (true, true) => continue, - } + for (src, dst) in candidates.c.into_iter() { + trace!(?src, ?dst); - trace!(?src, ?dst, "merge"); - merged_locals.insert(orig_src); - merged_locals.insert(orig_dst); + let Some(mut src) = relevant.find(src) else { continue }; + let Some(mut dst) = relevant.find(dst) else { continue }; + if src == dst { + continue; + } - // Replace `src` by `dst`. - relevant.union(src, dst); - live.union_rows(/* read */ src, /* write */ dst); + let Some(src_live_ranges) = live.row(src) else { continue }; + let Some(dst_live_ranges) = live.row(dst) else { continue }; + trace!(?src, ?src_live_ranges); + trace!(?dst, ?dst_live_ranges); + + if src_live_ranges.disjoint(dst_live_ranges) { + // We want to replace `src` by `dst`. + let mut orig_src = relevant.original[src]; + let mut orig_dst = relevant.original[dst]; + + // The return place and function arguments are required and cannot be renamed. + // This check cannot be made during candidate collection, as we may want to + // unify the same non-required local with several required locals. + match (is_local_required(orig_src, body), is_local_required(orig_dst, body)) { + // Renaming `src` is ok. + (false, _) => {} + // Renaming `src` is wrong, but renaming `dst` is ok. + (true, false) => { + std::mem::swap(&mut src, &mut dst); + std::mem::swap(&mut orig_src, &mut orig_dst); + } + // Neither local can be renamed, so skip this case. + (true, true) => continue, } + + trace!(?src, ?dst, "merge"); + merged_locals.insert(orig_src); + merged_locals.insert(orig_dst); + + // Replace `src` by `dst`. + relevant.union(src, dst); + live.union_rows(/* read */ src, /* write */ dst); } } trace!(?merged_locals); @@ -342,11 +340,9 @@ impl RelevantLocals { shrink.get_or_insert_with(local, || original.push(local)); }; - for (&src, destinations) in candidates.c.iter() { + for &(src, dest) in candidates.c.iter() { declare(src); - for &dest in destinations { - declare(dest) - } + declare(dest) } let renames = IndexVec::from_fn_n(|l| l, original.len()); @@ -388,8 +384,6 @@ impl RelevantLocals { struct Candidates { /// The set of candidates we are considering in this optimization. /// - /// We will always merge the key into at most one of its values. - /// /// Whether a place ends up in the key or the value does not correspond to whether it appears as /// the lhs or rhs of any assignment. As a matter of fact, the places in here might never appear /// in an assignment at all. This happens because if we see an assignment like this: @@ -400,7 +394,7 @@ struct Candidates { /// /// We will still report that we would like to merge `_1` and `_2` in an attempt to allow us to /// remove that assignment. - c: FxIndexMap>, + c: Vec<(Local, Local)>, } // We first implement some utility functions which we will expose removing candidates according to @@ -414,19 +408,13 @@ impl Candidates { let mut visitor = FindAssignments { body, candidates: Default::default(), borrowed }; visitor.visit_body(body); - // Deduplicate candidates. - for (_, cands) in visitor.candidates.iter_mut() { - cands.sort(); - cands.dedup(); - } - Candidates { c: visitor.candidates } } } struct FindAssignments<'a, 'tcx> { body: &'a Body<'tcx>, - candidates: FxIndexMap>, + candidates: Vec<(Local, Local)>, borrowed: &'a DenseBitSet, } @@ -456,7 +444,7 @@ impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> { } // We may insert duplicates here, but that's fine - self.candidates.entry(src).or_default().push(dest); + self.candidates.push((src, dest)); } } } From 5d0e66d451b0da5e7a712d5f9e041e5bbbbb0255 Mon Sep 17 00:00:00 2001 From: Camille Gillot Date: Wed, 27 Aug 2025 01:40:16 +0000 Subject: [PATCH 8/8] Use rustc_data_structures::union_find. --- compiler/rustc_mir_transform/src/dest_prop.rs | 42 +++++++------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index b6306c4869bd9..9ba2d274691de 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -137,6 +137,7 @@ //! [attempt 3]: https://github.com/rust-lang/rust/pull/72632 //! [attempt 4]: https://github.com/rust-lang/rust/pull/96451 +use rustc_data_structures::union_find::UnionFind; use rustc_index::bit_set::DenseBitSet; use rustc_index::interval::SparseIntervalMatrix; use rustc_index::{IndexVec, newtype_index}; @@ -225,13 +226,12 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation { merged_locals.insert(orig_dst); // Replace `src` by `dst`. - relevant.union(src, dst); - live.union_rows(/* read */ src, /* write */ dst); + let head = relevant.union(src, dst); + live.union_rows(/* read */ src, /* write */ head); + live.union_rows(/* read */ dst, /* write */ head); } } trace!(?merged_locals); - - relevant.make_idempotent(); trace!(?relevant.renames); if merged_locals.is_empty() { @@ -326,7 +326,7 @@ newtype_index! { struct RelevantLocals { original: IndexVec, shrink: IndexVec>, - renames: IndexVec, + renames: UnionFind, } impl RelevantLocals { @@ -345,35 +345,21 @@ impl RelevantLocals { declare(dest) } - let renames = IndexVec::from_fn_n(|l| l, original.len()); + let renames = UnionFind::new(original.len()); RelevantLocals { original, shrink, renames } } - fn find(&self, src: Local) -> Option { - let mut src = self.shrink[src]?; - while let s2 = self.renames[src] - && src != s2 - { - src = s2 - } + fn find(&mut self, src: Local) -> Option { + let src = self.shrink[src]?; + let src = self.renames.find(src); Some(src) } - fn union(&mut self, lhs: RelevantLocal, rhs: RelevantLocal) { - self.renames[lhs] = rhs; - } - - fn make_idempotent(&mut self) { - for l in self.renames.indices() { - let mut h = self.renames[l]; - while let h2 = self.renames[h] - && h != h2 - { - h = h2 - } - self.renames[l] = h; - debug_assert_eq!(h, self.renames[h], "non-idempotent for {l:?}"); - } + fn union(&mut self, lhs: RelevantLocal, rhs: RelevantLocal) -> RelevantLocal { + let head = self.renames.unify(lhs, rhs); + // We need to ensure we keep the original local of the RHS, as it may be a required local. + self.original[head] = self.original[rhs]; + head } }