Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Frame lowering optimizations #1

Open
wants to merge 4 commits into
base: nanomips-llvm13-submission
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/lib/Target/Mips/MipsMachineFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,19 @@ int MipsFunctionInfo::getMoveF64ViaSpillFI(MachineFunction &MF,
}

void MipsFunctionInfo::anchor() {}

unsigned MipsFunctionInfo::getCalleeSavedStackSize() {
return CalleeSavedStackSize;
}

void MipsFunctionInfo::setCalleeSavedStackSize(unsigned Size) {
CalleeSavedStackSize = Size;
}

bool MipsFunctionInfo::isTwoStepStackSetup(MachineFunction &MF) {

const MipsSubtarget &STI =
*static_cast<const MipsSubtarget *>(&MF.getSubtarget());

return (MF.getFrameInfo().getStackSize() > 4096) && STI.hasNanoMips();
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/Mips/MipsMachineFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ class MipsFunctionInfo : public MachineFunctionInfo {
JumpTableEntryInfo[Idx]->Signed = Sign;
}
}
unsigned getCalleeSavedStackSize();
void setCalleeSavedStackSize(unsigned Size);
bool isTwoStepStackSetup(MachineFunction &MF);

private:
virtual void anchor();
Expand Down Expand Up @@ -168,6 +171,7 @@ class MipsFunctionInfo : public MachineFunctionInfo {
};

SmallVector<NanoMipsJumpTableInfo *, 2> JumpTableEntryInfo;
unsigned CalleeSavedStackSize = 0;
};

} // end namespace llvm
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/Mips/MipsRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,11 +303,21 @@ Register MipsRegisterInfo::
getFrameRegister(const MachineFunction &MF) const {
const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
bool IsN64 =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsN64();
bool IsP32 =
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI().IsP32();

// If function doesn't have var-sized objects and function doesn't need stack
// realignment but frame pointer elimination is disabled we want offsets to be
// relative to sp instead of fp
if (Subtarget.hasNanoMips())
if (!MFI.hasVarSizedObjects() && !TRI->hasStackRealignment(MF) &&
MF.getTarget().Options.DisableFramePointerElim(MF))
return Mips::SP_NM;

if (Subtarget.inMips16Mode())
return TFI->hasFP(MF) ? Mips::S0 : Mips::SP;
else
Expand Down
193 changes: 161 additions & 32 deletions llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,19 +432,61 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();

// Adjust stack.
TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();

int64_t CalleeSavedStackSize;
int64_t LoaclStackSize;
nikolaperic marked this conversation as resolved.
Show resolved Hide resolved
// If we have two-step stack setup MBBI_2 will point to the
// first instruction after calle-saves store sequence
MachineBasicBlock::iterator MBBI_2 = MBBI;

if (MipsFI->isTwoStepStackSetup(MF)) {

CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize();
unsigned NumOfCSI = MFI.getCalleeSavedInfo().size();

// Move MBBI_2 to point to the first instruction after
// calle-saves store sequence. That's the place for the second
// steck pointer adjustment.
nikolaperic marked this conversation as resolved.
Show resolved Hide resolved
std::advance(MBBI_2, NumOfCSI);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is assuming that the prologue definitely has one instruction per callee save, and that they're right here? That might be the case but in future where save/restore are generated might invalidate this. It would be a good idea to at least assert that the instructions are what we assume them to be and document that assumption.

Copy link
Collaborator Author

@nikolaperic nikolaperic Jul 26, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think that's the safer way. However, in order to do that I think we need to introduce a new function which checks opcodes and operands of skipped instructions. This function will be nanomips-specific so I think it's not a good idea to write it inside MipsSEFrameLowering. Maybe a good place to write such function is inside MipsFunctionInfo since we have already put CalleeSavedStackSize variable there? Is this a good approach for this problem?

Note that on the line 452 we already had this kind of skipping CSR store instructions.


// The first stack pointer adjustment to cover space needed
// to spill callee-saved registers on stack.
TII.adjustStackPtr(SP, -CalleeSavedStackSize, MBB, MBBI);

LoaclStackSize = StackSize - CalleeSavedStackSize;

// emit ".cfi_def_cfa_offset StackSize"
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
// The second stack pointer adjustment to cover space needed
// to spill local objects on stack.
TII.adjustStackPtr(SP, -LoaclStackSize, MBB, MBBI_2);

} else
// Adjust stack.
TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);

if (MipsFI->isTwoStepStackSetup(MF)) {
nikolaperic marked this conversation as resolved.
Show resolved Hide resolved
// emit ".cfi_def_cfa_offset CalleeSavedStackSize"
// emit ".cfi_def_cfa_offset StackSize = CalleeSavedStackSize +
// LoaclStackSize"
unsigned CFIIndex_1 = MF.addFrameInst(
MCCFIInstruction::cfiDefCfaOffset(nullptr, CalleeSavedStackSize));
unsigned CFIIndex_2 =
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex_1);
BuildMI(MBB, MBBI_2, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex_2);
} else {
// emit ".cfi_def_cfa_offset StackSize"
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}

if (MF.getFunction().hasFnAttribute("interrupt"))
emitInterruptPrologueStub(MF, MBB);

const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();

if (!CSI.empty()) {
// Find the instruction past the last instruction that saves a callee-saved
Expand Down Expand Up @@ -527,15 +569,40 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,

// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
// Insert instruction "move $fp, $sp" at this location.
BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO)
.setMIFlag(MachineInstr::FrameSetup);

// emit ".cfi_def_cfa_register $fp"
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FP, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
if (STI.hasNanoMips()) {

BuildMI(MBB, MBBI_2, dl, TII.get(ADDiu), FP)
.addReg(SP)
.addImm(-4096 + StackSize);

// emit ".cfi_def_cfa_register $fp"
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FP, true)));
BuildMI(MBB, MBBI_2, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);

// emit ".cfi_def_cfa_offset 4096"
unsigned CFIIndex_1 =
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 4096));
BuildMI(MBB, MBBI_2, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex_1);

} else {
// Insert instruction "move $fp, $sp" at this location.
BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP)
.addReg(SP)
.addReg(ZERO)
.setMIFlag(MachineInstr::FrameSetup);

// emit ".cfi_def_cfa_register $fp"
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
nullptr, MRI->getDwarfRegNum(FP, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}

if (RegInfo.hasStackRealignment(MF)) {
// addiu $Reg, $zero, -MaxAlignment
Expand All @@ -545,13 +612,19 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
"Function's alignment size requirement is not supported.");
int64_t MaxAlign = -(int64_t)MFI.getMaxAlign().value();

if (ABI.IsP32())
BuildMI(MBB, MBBI, dl, TII.get(Mips::Li_NM), VR).addImm(MaxAlign);
else
if (ABI.IsP32()) {
uint64_t MaxAlignment = MFI.getMaxAlign().value();
BuildMI(MBB, MBBI, dl, TII.get(Mips::INS_NM), SP)
.addReg(ZERO)
.addImm(0)
.addImm(Log2_64(MaxAlignment))
.addReg(SP);
} else {
BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR)
.addReg(ZERO)
.addImm(MaxAlign);
BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR);
BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR);
}

if (hasBP(MF)) {
// move $s7, $sp
Expand Down Expand Up @@ -709,7 +782,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned MOVE = ABI.GetGPRMoveOp();

// if framepointer enabled, restore the stack pointer.
if (hasFP(MF)) {
if (hasFP(MF) && !STI.hasNanoMips()) {
// Find the first instruction that restores a callee-saved register.
MachineBasicBlock::iterator I = MBBI;

Expand Down Expand Up @@ -747,8 +820,26 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
if (!StackSize)
return;

// Adjust stack.
TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
if (MipsFI->isTwoStepStackSetup(MF)) {

int64_t CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize();
int64_t LoaclStackSize = StackSize - CalleeSavedStackSize;

int64_t NumOfCSI = MFI.getCalleeSavedInfo().size();

MachineBasicBlock::iterator MBBI_2 = MBBI;
// Move MBBI_2 to point to the first instruction in
// calle-saved load sequence. That's the place where we
// need to undo the second stack adjustment
std::advance(MBBI_2, (-1) * NumOfCSI);

// Undo the second stack pointer adjustment
TII.adjustStackPtr(SP, LoaclStackSize, MBB, MBBI_2);
// Undo the first stack pointer adjustment
TII.adjustStackPtr(SP, CalleeSavedStackSize, MBB, MBBI);
} else
// Adjust stack.
TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
}

void MipsSEFrameLowering::emitInterruptEpilogueStub(
Expand Down Expand Up @@ -935,22 +1026,60 @@ bool MipsSEFrameLowering::assignCalleeSavedSpillSlots(
if (!STI.hasNanoMips())
return false;

static const std::unordered_map<unsigned, unsigned> Regs = {
{Mips::GP_NM, 0}, {Mips::FP_NM, 1}, {Mips::RA_NM, 2}, {Mips::S0_NM, 3},
{Mips::S1_NM, 4}, {Mips::S2_NM, 5}, {Mips::S3_NM, 6}, {Mips::S4_NM, 7},
{Mips::S5_NM, 8}, {Mips::S6_NM, 9}, {Mips::S7_NM, 10},
};

static const std::unordered_map<unsigned, unsigned> CSNumToReg = {
{0, Mips::GP_NM}, {1, Mips::FP_NM}, {2, Mips::RA_NM}, {3, Mips::S0_NM},
{4, Mips::S1_NM}, {5, Mips::S2_NM}, {6, Mips::S3_NM}, {7, Mips::S4_NM},
{8, Mips::S5_NM}, {9, Mips::S6_NM}, {10, Mips::S7_NM},
};

// nanoMIPS save and restore instructions require callee-saved registers to be
// saved in particular order on the stack.
auto SortCalleeSaves = [](CalleeSavedInfo First, CalleeSavedInfo Second) {
std::unordered_map<unsigned, unsigned> Regs{
{Mips::GP_NM, 0}, {Mips::FP_NM, 1}, {Mips::RA_NM, 2}, {Mips::S0_NM, 3},
{Mips::S1_NM, 4}, {Mips::S2_NM, 5}, {Mips::S3_NM, 6}, {Mips::S4_NM, 7},
{Mips::S5_NM, 8}, {Mips::S6_NM, 9}, {Mips::S7_NM, 10},
};

auto CompareCalleeSaves = [](CalleeSavedInfo First, CalleeSavedInfo Second) {
// There should be no callee-saved registers that are not part of the list.
assert(Regs.find(First.getReg()) != Regs.end() &&
Regs.find(Second.getReg()) != Regs.end());

return Regs[First.getReg()] < Regs[Second.getReg()];
return Regs.at(First.getReg()) < Regs.at(Second.getReg());
};
std::sort(CSI.begin(), CSI.end(), SortCalleeSaves);

// If CSI list has less than two callee-saved registers
// no insertions nor sorting is needed
if (CSI.size() >= 2) {

SmallBitVector CSNumBitVector(11);
for (CalleeSavedInfo &CS : CSI)
CSNumBitVector.set(Regs.at(CS.getReg()));

int MinCSNum = CSNumBitVector.find_first();
int MaxCSNum = CSNumBitVector.find_last();

// Inserting all of the missing callee-saved registers between min and max
// in order to allow further load-store optimizations
for (int i = MinCSNum + 1; i < MaxCSNum; ++i)
if (!CSNumBitVector.test(i))
CSI.push_back(CalleeSavedInfo(CSNumToReg.at(i)));

std::sort(CSI.begin(), CSI.end(), CompareCalleeSaves);
}

MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
const MachineRegisterInfo &MRI = MF.getRegInfo();

unsigned CalleeSavedOffsetSize = 0;
for (CalleeSavedInfo &CS : CSI) {
Register Reg = CS.getReg();
auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
CalleeSavedOffsetSize += RegSize;
}
uint64_t AlignedCSStackSize = alignTo(CalleeSavedOffsetSize, 16);
MipsFI->setCalleeSavedStackSize(AlignedCSStackSize);

return false;
}

Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,8 @@ bool MipsSEDAGToDAGISel::selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,

bool MipsSEDAGToDAGISel::selectIntAddrSImm9(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
return selectAddrFrameIndex(Addr, Base, Offset) ||
selectAddrFrameIndexOffset(Addr, Base, Offset, 9);
return selectAddrFrameIndexOffset(Addr, Base, Offset, 9) &&
!isa<FrameIndexSDNode>(Base);
}

bool MipsSEDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base,
Expand Down Expand Up @@ -541,7 +541,8 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexUOffset(

bool MipsSEDAGToDAGISel::selectIntAddrUImm12(SDValue Addr, SDValue &Base,
SDValue &Offset) const {
return selectAddrFrameIndexUOffset(Addr, Base, Offset, 12, 0);
return selectAddrFrameIndex(Addr, Base, Offset) ||
selectAddrFrameIndexUOffset(Addr, Base, Offset, 12, 0);
}

// A load/store 'x' indexed (reg + reg)
Expand Down
30 changes: 28 additions & 2 deletions llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
static_cast<const MipsTargetMachine &>(MF.getTarget()).getABI();
const MipsRegisterInfo *RegInfo =
static_cast<const MipsRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
const MipsSubtarget &STI =
*static_cast<const MipsSubtarget *>(&MF.getSubtarget());

const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
int MinCSFI = 0;
Expand Down Expand Up @@ -220,7 +222,29 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
bool IsKill = false;
int64_t Offset;

Offset = SPOffset + (int64_t)StackSize;
if (STI.hasNanoMips()) {

if (MipsFI->isTwoStepStackSetup(MF)) {

int64_t CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize();
if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI)
Offset = SPOffset + (int64_t)CalleeSavedStackSize;
else if (FrameReg == Mips::FP_NM)
Offset = SPOffset + 4096;
else
Offset = SPOffset + StackSize;

} else {

if (FrameReg == Mips::FP_NM)
Offset = SPOffset + 4096;
else
Offset = SPOffset + StackSize;
}

} else
Offset = SPOffset + (int64_t)StackSize;

Offset += MI.getOperand(OpNo + 1).getImm();

LLVM_DEBUG(errs() << "Offset : " << Offset << "\n"
Expand All @@ -247,7 +271,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
// TODO: This doesn't work well for nanoMIPS, because it has unsigned
// offsets and this check assumes signed.
if (OffsetBitSize < 16 && isInt<16>(Offset) &&
(!isIntN(OffsetBitSize, Offset) || !isAligned(OffsetAlign, Offset))) {
(STI.hasNanoMips() ? !isUIntN(OffsetBitSize, Offset)
: !isIntN(OffsetBitSize, Offset) ||
!isAligned(OffsetAlign, Offset))) {
// If we have an offset that needs to fit into a signed n-bit immediate
// (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu
MachineBasicBlock &MBB = *MI.getParent();
Expand Down
Loading