Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/MachineInstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,9 @@ class MachineInstr
NoUSWrap = 1 << 20, // Instruction supports geps
// no unsigned signed wrap.
SameSign = 1 << 21, // Both operands have the same sign.
InBounds = 1 << 22 // Pointer arithmetic remains inbounds.
InBounds = 1 << 22, // Pointer arithmetic remains inbounds.
// Implies NoUSWrap.
LRSplit = 1 << 23 // Instruction for live range split.
};

private:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/SplitKit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(
| getInternalReadRegState(!FirstCopy), SubIdx)
.addReg(FromReg, 0, SubIdx);

CopyMI->setFlag(MachineInstr::LRSplit);
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (FirstCopy) {
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
Expand All @@ -550,6 +551,7 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
// The full vreg is copied.
MachineInstr *CopyMI =
BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
CopyMI->setFlag(MachineInstr::LRSplit);
return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
}

Expand Down
34 changes: 27 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9878,6 +9878,30 @@ unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
return AMDGPU::COPY;
}

bool SIInstrInfo::canAddToBBProlog(const MachineInstr &MI) const {
uint16_t Opcode = MI.getOpcode();
// Check if it is SGPR spill or wwm-register spill Opcode.
if (isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode))
return true;

const MachineFunction *MF = MI.getMF();
const MachineRegisterInfo &MRI = MF->getRegInfo();
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

// See if this is Liverange split instruction inserted for SGPR or
// wwm-register. The implicit def inserted for wwm-registers should also be
// included as they can appear at the bb begin.
bool IsLRSplitInst = MI.getFlag(MachineInstr::LRSplit);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of adding a flag, would it be enough to check that this is an SGPR copy?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That won't help for the WWM case (though below it is checking the flag in the SGPR case too)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of adding a flag, would it be enough to check that this is an SGPR copy?

That won't help. It is important to note that this target hook is also invoked by other passes to identify the right insertion points, such as PHIElimination and MI Sink (called via SkipPHIsAndLabels & SkipPHIsLabelsAndDebug), which can lead to incorrect insertion points if all COPY instructions are included as part of BB Prolog (we have encountered some errors due to that. See this comment from the other PR #117543 (comment)). By adding an MI flag to LR_Split COPY instructions, we ensure that only the intended candidates are considered during RA while inserting spill and LR_split instructions.

if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
return false;

Register Reg = MI.getOperand(0).getReg();
if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
return IsLRSplitInst;

return MFI->isWWMReg(Reg);
}

bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
Register Reg) const {
// We need to handle instructions which may be inserted during register
Expand All @@ -9886,20 +9910,16 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI,
// needed by the prolog. However, the insertions for scalar registers can
// always be placed at the BB top as they are independent of the exec mask
// value.
const MachineFunction *MF = MI.getMF();
bool IsNullOrVectorRegister = true;
if (Reg) {
const MachineFunction *MF = MI.getMF();
const MachineRegisterInfo &MRI = MF->getRegInfo();
IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
}

uint16_t Opcode = MI.getOpcode();
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
return IsNullOrVectorRegister &&
(isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode) ||
(Opcode == AMDGPU::IMPLICIT_DEF &&
MFI->isWWMReg(MI.getOperand(0).getReg())) ||
(!MI.isTerminator() && Opcode != AMDGPU::COPY &&
(canAddToBBProlog(MI) ||
(!MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1579,6 +1579,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isBasicBlockPrologue(const MachineInstr &MI,
Register Reg = Register()) const override;

bool canAddToBBProlog(const MachineInstr &MI) const;

MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsPt,
const DebugLoc &DL, Register Src,
Expand Down
Loading
Loading