-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[LLVM][AArch64]Use load/store with consecutive registers in SME2 or S… #77665
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
5e9b05b
fff3e34
f61f7bc
94f21b1
19a8ab6
6312650
b18f3a6
898a5fc
2c67e80
ecb0f57
c8bdbb9
633fa85
0b2c9f7
a6d036b
e314a8a
c636d7d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1508,6 +1508,12 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { | |
| switch (I->getOpcode()) { | ||
| default: | ||
| return false; | ||
| case AArch64::PTRUE_C_B: | ||
| case AArch64::LD1B_2Z_IMM: | ||
| case AArch64::ST1B_2Z_IMM: | ||
| assert((I->getMF()->getSubtarget<AArch64Subtarget>().hasSVE2p1() || | ||
| I->getMF()->getSubtarget<AArch64Subtarget>().hasSME2()) && | ||
| "Expected SME2 or SVE2.1 Targer Architecture."); | ||
CarolineConcatto marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| case AArch64::STR_ZXI: | ||
| case AArch64::STR_PXI: | ||
| case AArch64::LDR_ZXI: | ||
|
|
@@ -2791,6 +2797,7 @@ static void computeCalleeSaveRegisterPairs( | |
|
|
||
| bool IsWindows = isTargetWindows(MF); | ||
| bool NeedsWinCFI = needsWinCFI(MF); | ||
| const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); | ||
| AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); | ||
| MachineFrameInfo &MFI = MF.getFrameInfo(); | ||
| CallingConv::ID CC = MF.getFunction().getCallingConv(); | ||
|
|
@@ -2859,7 +2866,11 @@ static void computeCalleeSaveRegisterPairs( | |
| RPI.Reg2 = NextReg; | ||
| break; | ||
| case RegPairInfo::PPR: | ||
| break; | ||
| case RegPairInfo::ZPR: | ||
| if (Subtarget.hasSVE2p1() || Subtarget.hasSME2()) | ||
CarolineConcatto marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (((RPI.Reg1 - AArch64::Z0) & 1) == 0 && (NextReg == RPI.Reg1 + 1)) | ||
| RPI.Reg2 = NextReg; | ||
| break; | ||
| } | ||
| } | ||
|
|
@@ -2904,7 +2915,7 @@ static void computeCalleeSaveRegisterPairs( | |
| assert(OffsetPre % Scale == 0); | ||
|
|
||
| if (RPI.isScalable()) | ||
| ScalableByteOffset += StackFillDir * Scale; | ||
| ScalableByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); | ||
| else | ||
| ByteOffset += StackFillDir * (RPI.isPaired() ? 2 * Scale : Scale); | ||
|
|
||
|
|
@@ -2915,9 +2926,6 @@ static void computeCalleeSaveRegisterPairs( | |
| (IsWindows && RPI.Reg2 == AArch64::LR))) | ||
| ByteOffset += StackFillDir * 8; | ||
|
|
||
| assert(!(RPI.isScalable() && RPI.isPaired()) && | ||
| "Paired spill/fill instructions don't exist for SVE vectors"); | ||
|
|
||
| // Round up size of non-pair to pair size if we need to pad the | ||
| // callee-save area to ensure 16-byte alignment. | ||
| if (NeedGapToAlignStack && !NeedsWinCFI && | ||
|
|
@@ -3004,6 +3012,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( | |
| } | ||
| return true; | ||
| } | ||
| bool PtrueCreated = false; | ||
CarolineConcatto marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) { | ||
| unsigned Reg1 = RPI.Reg1; | ||
| unsigned Reg2 = RPI.Reg2; | ||
|
|
@@ -3038,10 +3047,10 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( | |
| Alignment = Align(16); | ||
| break; | ||
| case RegPairInfo::ZPR: | ||
| StrOpc = AArch64::STR_ZXI; | ||
| Size = 16; | ||
| Alignment = Align(16); | ||
| break; | ||
| StrOpc = RPI.isPaired() ? AArch64::ST1B_2Z_IMM : AArch64::STR_ZXI; | ||
| Size = 16; | ||
| Alignment = Align(16); | ||
| break; | ||
| case RegPairInfo::PPR: | ||
| StrOpc = AArch64::STR_PXI; | ||
| Size = 2; | ||
|
|
@@ -3065,19 +3074,40 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( | |
| std::swap(Reg1, Reg2); | ||
| std::swap(FrameIdxReg1, FrameIdxReg2); | ||
| } | ||
|
|
||
| unsigned PairRegs; | ||
| unsigned PnReg; | ||
| if (RPI.isPaired() && RPI.isScalable()) { | ||
| PairRegs = AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0); | ||
| if (!PtrueCreated) { | ||
| PtrueCreated = true; | ||
| // Any one of predicate-as-count will be free to use | ||
| // This can be replaced in the future if needed | ||
| PnReg = AArch64::PN8; | ||
|
||
| BuildMI(MBB, MI, DL, TII.get(AArch64::PTRUE_C_B), PnReg) | ||
| .setMIFlags(MachineInstr::FrameSetup); | ||
| } | ||
| } | ||
|
|
||
| MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); | ||
| if (!MRI.isReserved(Reg1)) | ||
| MBB.addLiveIn(Reg1); | ||
| if (RPI.isPaired()) { | ||
| if (!MRI.isReserved(Reg2)) | ||
| MBB.addLiveIn(Reg2); | ||
| MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); | ||
| if (RPI.isScalable()) | ||
| MIB.addReg(PairRegs); | ||
|
||
| else | ||
| MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); | ||
| MIB.addMemOperand(MF.getMachineMemOperand( | ||
| MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), | ||
| MachineMemOperand::MOStore, Size, Alignment)); | ||
| } | ||
| MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) | ||
| .addReg(AArch64::SP) | ||
| if (RPI.isPaired() && RPI.isScalable()) | ||
| MIB.addReg(PnReg); | ||
| else | ||
| MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)); | ||
| MIB.addReg(AArch64::SP) | ||
| .addImm(RPI.Offset) // [sp, #offset*scale], | ||
| // where factor*scale is implicit | ||
| .setMIFlag(MachineInstr::FrameSetup); | ||
|
|
@@ -3089,8 +3119,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( | |
|
|
||
| // Update the StackIDs of the SVE stack slots. | ||
| MachineFrameInfo &MFI = MF.getFrameInfo(); | ||
| if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) | ||
| MFI.setStackID(RPI.FrameIdx, TargetStackID::ScalableVector); | ||
| if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR) { | ||
| MFI.setStackID(FrameIdxReg1, TargetStackID::ScalableVector); | ||
| if (RPI.isPaired()) | ||
| MFI.setStackID(FrameIdxReg2, TargetStackID::ScalableVector); | ||
| } | ||
|
|
||
| } | ||
| return true; | ||
|
|
@@ -3109,7 +3142,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( | |
| DL = MBBI->getDebugLoc(); | ||
|
|
||
| computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); | ||
|
|
||
CarolineConcatto marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (homogeneousPrologEpilog(MF, &MBB)) { | ||
| auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) | ||
| .setMIFlag(MachineInstr::FrameDestroy); | ||
|
|
@@ -3130,6 +3163,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( | |
| auto ZPREnd = std::find_if_not(ZPRBegin, RegPairs.end(), IsZPR); | ||
| std::reverse(ZPRBegin, ZPREnd); | ||
|
|
||
| bool PtrueCreated = false; | ||
CarolineConcatto marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| for (const RegPairInfo &RPI : RegPairs) { | ||
| unsigned Reg1 = RPI.Reg1; | ||
| unsigned Reg2 = RPI.Reg2; | ||
|
|
@@ -3162,7 +3196,7 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( | |
| Alignment = Align(16); | ||
| break; | ||
| case RegPairInfo::ZPR: | ||
| LdrOpc = AArch64::LDR_ZXI; | ||
| LdrOpc = RPI.isPaired() ? AArch64::LD1B_2Z_IMM : AArch64::LDR_ZXI; | ||
| Size = 16; | ||
| Alignment = Align(16); | ||
| break; | ||
|
|
@@ -3187,15 +3221,33 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( | |
| std::swap(Reg1, Reg2); | ||
| std::swap(FrameIdxReg1, FrameIdxReg2); | ||
| } | ||
|
|
||
| unsigned PnReg; | ||
| unsigned PairRegs; | ||
| if (RPI.isPaired() && RPI.isScalable()) { | ||
| PairRegs = AArch64::Z0_Z1 + (RPI.Reg1 - AArch64::Z0); | ||
| if (!PtrueCreated) { | ||
| PtrueCreated = true; | ||
| // Any one of predicate-as-count will be free to use | ||
| // This can be replaced in the future if needed | ||
| PnReg = AArch64::PN8; | ||
| BuildMI(MBB, MBBI, DL, TII.get(AArch64::PTRUE_C_B), PnReg) | ||
| .setMIFlags(MachineInstr::FrameDestroy); | ||
| } | ||
| } | ||
|
|
||
| MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(LdrOpc)); | ||
| if (RPI.isPaired()) { | ||
| MIB.addReg(Reg2, getDefRegState(true)); | ||
| MIB.addReg(RPI.isScalable() ? PairRegs : Reg2, getDefRegState(true)); | ||
| MIB.addMemOperand(MF.getMachineMemOperand( | ||
| MachinePointerInfo::getFixedStack(MF, FrameIdxReg2), | ||
| MachineMemOperand::MOLoad, Size, Alignment)); | ||
| } | ||
| MIB.addReg(Reg1, getDefRegState(true)) | ||
| .addReg(AArch64::SP) | ||
| if (RPI.isPaired() && RPI.isScalable()) | ||
| MIB.addReg(PnReg); | ||
| else | ||
| MIB.addReg(Reg1, getDefRegState(true)); | ||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| MIB.addReg(AArch64::SP) | ||
| .addImm(RPI.Offset) // [sp, #offset*scale] | ||
| // where factor*scale is implicit | ||
| .setMIFlag(MachineInstr::FrameDestroy); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As future work, I wonder if we can extend this further to use the quad variants of these instructions as well.