@@ -1615,6 +1615,64 @@ void GCNSchedStage::revertScheduling() {
16151615 DAG.Regions [RegionIdx] = std::pair (DAG.RegionBegin , DAG.RegionEnd );
16161616}
16171617
1618+ bool PreRARematStage::allUsesAvailableAt (const MachineInstr *InstToRemat,
1619+ SlotIndex OriginalIdx,
1620+ SlotIndex RematIdx) const {
1621+
1622+ LiveIntervals *LIS = DAG.LIS ;
1623+ MachineRegisterInfo &MRI = DAG.MRI ;
1624+ OriginalIdx = OriginalIdx.getRegSlot (true );
1625+ RematIdx = std::max (RematIdx, RematIdx.getRegSlot (true ));
1626+ for (const MachineOperand &MO : InstToRemat->operands ()) {
1627+ if (!MO.isReg () || !MO.getReg () || !MO.readsReg ())
1628+ continue ;
1629+
1630+ if (!MO.getReg ().isVirtual ()) {
1631+ // Do not attempt to reason about PhysRegs
1632+ // TODO: better analysis of PhysReg livness
1633+ if (!DAG.MRI .isConstantPhysReg (MO.getReg ()) &&
1634+ !DAG.TII ->isIgnorableUse (MO))
1635+ return false ;
1636+
1637+ // Constant PhysRegs and IgnorableUses are okay
1638+ continue ;
1639+ }
1640+
1641+ LiveInterval &LI = LIS->getInterval (MO.getReg ());
1642+ const VNInfo *OVNI = LI.getVNInfoAt (OriginalIdx);
1643+ assert (OVNI);
1644+
1645+ // Don't allow rematerialization immediately after the original def.
1646+ // It would be incorrect if InstToRemat redefines the register.
1647+ // See PR14098.
1648+ if (SlotIndex::isSameInstr (OriginalIdx, RematIdx))
1649+ return false ;
1650+
1651+ if (OVNI != LI.getVNInfoAt (RematIdx))
1652+ return false ;
1653+
1654+ // Check that subrange is live at RematIdx.
1655+ if (LI.hasSubRanges ()) {
1656+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo ();
1657+ unsigned SubReg = MO.getSubReg ();
1658+ LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask (SubReg)
1659+ : MRI.getMaxLaneMaskForVReg (MO.getReg ());
1660+ for (LiveInterval::SubRange &SR : LI.subranges ()) {
1661+ if ((SR.LaneMask & LM).none ())
1662+ continue ;
1663+ if (!SR.liveAt (RematIdx))
1664+ return false ;
1665+
1666+ // Early exit if all used lanes are checked. No need to continue.
1667+ LM &= ~SR.LaneMask ;
1668+ if (LM.none ())
1669+ break ;
1670+ }
1671+ }
1672+ }
1673+ return true ;
1674+ }
1675+
16181676void PreRARematStage::collectRematerializableInstructions () {
16191677 const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(DAG.TRI );
16201678 for (unsigned I = 0 , E = DAG.MRI .getNumVirtRegs (); I != E; ++I) {
@@ -1636,6 +1694,47 @@ void PreRARematStage::collectRematerializableInstructions() {
16361694 if (Def->getParent () == UseI->getParent ())
16371695 continue ;
16381696
1697+ bool HasRematDependency = false ;
1698+ // Check if this instruction uses any registers that are planned to be
1699+ // rematerialized
1700+ for (auto &RematEntry : RematerializableInsts) {
1701+ if (find_if (RematEntry.second ,
1702+ [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
1703+ for (MachineOperand &MO : Def->operands ()) {
1704+ if (!MO.isReg ())
1705+ continue ;
1706+ if (MO.getReg () == Remat.first ->getOperand (0 ).getReg ())
1707+ return true ;
1708+ }
1709+ return false ;
1710+ }) != RematEntry.second .end ()) {
1711+ HasRematDependency = true ;
1712+ break ;
1713+ }
1714+ }
1715+ // Do not rematerialize an instruction if it uses an instruction that we
1716+ // have designated for rematerialization.
1717+ // FIXME: Allow for rematerialization chains: this requires 1. updating
1718+ // remat points to account for uses that are rematerialized, and 2. either
1719+ // rematerializing the candidates in careful ordering, or deferring the MBB
1720+ // RP walk until the entire chain has been rematerialized.
1721+ if (HasRematDependency)
1722+ continue ;
1723+
1724+ // Similarly, check if the UseI is planned to be remat.
1725+ for (auto &RematEntry : RematerializableInsts) {
1726+ if (find_if (RematEntry.second ,
1727+ [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
1728+ return Remat.first == UseI;
1729+ }) != RematEntry.second .end ()) {
1730+ HasRematDependency = true ;
1731+ break ;
1732+ }
1733+ }
1734+
1735+ if (HasRematDependency)
1736+ break ;
1737+
16391738 // We are only collecting defs that are defined in another block and are
16401739 // live-through or used inside regions at MinOccupancy. This means that the
16411740 // register must be in the live-in set for the region.
@@ -1644,8 +1743,13 @@ void PreRARematStage::collectRematerializableInstructions() {
16441743 auto It = DAG.LiveIns [I].find (Reg);
16451744 if (It != DAG.LiveIns [I].end () && !It->second .none ()) {
16461745 if (DAG.RegionsWithMinOcc [I]) {
1647- RematerializableInsts[I][Def] = UseI;
1648- AddedToRematList = true ;
1746+ SlotIndex DefIdx = DAG.LIS ->getInstructionIndex (*Def);
1747+ SlotIndex UseIdx =
1748+ DAG.LIS ->getInstructionIndex (*UseI).getRegSlot (true );
1749+ if (allUsesAvailableAt (Def, DefIdx, UseIdx)) {
1750+ RematerializableInsts[I][Def] = UseI;
1751+ AddedToRematList = true ;
1752+ }
16491753 }
16501754
16511755 // Collect regions with rematerializable reg as live-in to avoid
@@ -1719,6 +1823,35 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
17191823 Register DefReg = Def->getOperand (0 ).getReg ();
17201824 TotalSinkableRegs +=
17211825 SIRegisterInfo::getNumCoveredRegs (NewLiveIns[I][DefReg]);
1826+ #ifdef EXPENSIVE_CHECKS
1827+ // All uses are known to be available / live at the remat point. Thus, the
1828+ // uses should already be live in to the region.
1829+ for (MachineOperand &MO : Def->operands ()) {
1830+ if (!MO.isReg () || !MO.getReg () || !MO.readsReg ())
1831+ continue ;
1832+
1833+ Register UseReg = MO.getReg ();
1834+ if (!UseReg.isVirtual ())
1835+ continue ;
1836+
1837+ LiveInterval &LI = LIS->getInterval (UseReg);
1838+ LaneBitmask LM = DAG.MRI .getMaxLaneMaskForVReg (MO.getReg ());
1839+ if (LI.hasSubRanges () && MO.getSubReg ())
1840+ LM = DAG.TRI ->getSubRegIndexLaneMask (MO.getSubReg ());
1841+
1842+ assert (NewLiveIns[I].contains (UseReg));
1843+ LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
1844+ LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
1845+ // If this register has lanes not covered by the LiveIns, be sure they
1846+ // do not map to any subrange. ref:
1847+ // machine-scheduler-sink-trivial-remats.mir::omitted_subrange
1848+ if (UncoveredLanes.any ()) {
1849+ assert (LI.hasSubRanges ());
1850+ for (LiveInterval::SubRange &SR : LI.subranges ())
1851+ assert ((SR.LaneMask & UncoveredLanes).none ());
1852+ }
1853+ }
1854+ #endif
17221855 }
17231856 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
17241857 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs (VGPRsAfterSink);
@@ -1734,10 +1867,7 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
17341867 MachineBasicBlock::iterator InsertPos =
17351868 MachineBasicBlock::iterator (It.second );
17361869 Register Reg = Def->getOperand (0 ).getReg ();
1737- // Rematerialize MI to its use block. Since we are only rematerializing
1738- // instructions that do not have any virtual reg uses, we do not need to
1739- // call LiveRangeEdit::allUsesAvailableAt() and
1740- // LiveRangeEdit::canRematerializeAt().
1870+ // Rematerialize MI to its use block.
17411871 TII->reMaterialize (*InsertPos->getParent (), InsertPos, Reg,
17421872 Def->getOperand (0 ).getSubReg (), *Def, *DAG.TRI );
17431873 MachineInstr *NewMI = &*std::prev (InsertPos);
@@ -1847,9 +1977,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
18471977 return false ;
18481978
18491979 for (const MachineOperand &MO : MI.all_uses ()) {
1850- if (MO.getReg ().isVirtual ())
1851- return false ;
1852-
18531980 // We can't remat physreg uses, unless it is a constant or an ignorable
18541981 // use (e.g. implicit exec use on VALU instructions)
18551982 if (MO.getReg ().isPhysical ()) {
0 commit comments