diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index ee03eaa8ae527c..c03ea587805213 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -1553,7 +1553,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, // If this instruction has a FrameIndex operand, we need to // use that target machine register info object to eliminate // it. - TRI.eliminateFrameIndex(MI, SPAdj, i); + TRI.eliminateFrameIndex(MI, SPAdj, i, RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 8404f2231680d6..d850f4fd768311 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/EHPersonalities.h" @@ -2616,7 +2617,7 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, // object. // We need to factor in additional offsets applied during the prologue to the // frame, base, and stack pointer depending on which is used. - int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); + int64_t Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); const X86MachineFunctionInfo *X86FI = MF.getInfo(); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); uint64_t StackSize = MFI.getStackSize(); @@ -4140,6 +4141,14 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized( // emitPrologue if it gets called and emits CFI. MF.setHasWinCFI(false); + MachineFrameInfo &MFI = MF.getFrameInfo(); + // If the frame is big enough that we might need to scavenge a register to + // handle huge offsets, reserve a stack slot for that now. + if (!isInt<32>(MFI.estimateStackSize(MF))) { + int FI = MFI.CreateStackObject(SlotSize, Align(SlotSize), false); + RS->addScavengingFrameIndex(FI); + } + // If we are using Windows x64 CFI, ensure that the stack is always 8 byte // aligned. The format doesn't support misaligned stack adjustments. if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 638eb1c4f11e41..79ee9ecfdf3ce7 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86RegisterInfo.h" +#include "MCTargetDesc/X86BaseInfo.h" #include "X86FrameLowering.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" @@ -24,6 +25,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TileShapeInfo.h" @@ -905,7 +907,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); // Determine base register and offset. - int FIOffset; + int64_t FIOffset; Register BasePtr; if (MI.isReturn()) { assert((!hasStackRealignment(MF) || @@ -956,10 +958,34 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } if (MI.getOperand(FIOperandNum+3).isImm()) { - // Offset is a 32-bit integer. - int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); - int Offset = FIOffset + Imm; - assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && + int64_t Imm = MI.getOperand(FIOperandNum + 3).getImm(); + int64_t Offset = FIOffset + Imm; + bool FitsIn32Bits = isInt<32>(Offset); + // If the offset will not fit in a 32-bit displacement, + // then for 64-bit targets, scavenge a register to hold it. + // Otherwise, for 32-bit targets, this is a bug! + if (Is64Bit && !FitsIn32Bits) { + assert(RS && "RegisterScavenger was NULL"); + const X86InstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); + + RS->enterBasicBlockEnd(MBB); + RS->backward(std::next(II)); + + Register ScratchReg = RS->scavengeRegisterBackwards( + X86::GR64RegClass, II, /*RestoreAfter=*/false, /*SPAdj=*/0, + /*AllowSpill=*/true); + assert(ScratchReg != 0 && "scratch reg was 0"); + RS->setRegUsed(ScratchReg); + + BuildMI(MBB, II, DL, TII->get(X86::MOV64ri), ScratchReg).addImm(Offset); + + MI.getOperand(FIOperandNum + 3).setImm(0); + MI.getOperand(FIOperandNum + 2).setReg(ScratchReg); + + return false; + } + assert((Is64Bit || FitsIn32Bits) && "Requesting 64-bit offset in 32-bit immediate!"); if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 7296a5f021e4ad..dd03a108fb8e69 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -13,6 +13,8 @@ #ifndef LLVM_LIB_TARGET_X86_X86REGISTERINFO_H #define LLVM_LIB_TARGET_X86_X86REGISTERINFO_H +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_REGINFO_HEADER @@ -176,6 +178,13 @@ class X86RegisterInfo final : public X86GenRegisterInfo { SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // We need to register scavenge if the frame is very large. + return !isInt<32>(MFI.estimateStackSize(MF)); + } }; } // End llvm namespace diff --git a/llvm/test/CodeGen/X86/avx512f-large-stack.ll b/llvm/test/CodeGen/X86/avx512f-large-stack.ll new file mode 100644 index 00000000000000..3cb5391c56abf5 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512f-large-stack.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 4 +; RUN: llc -O0 -mtriple=x86_64 -mattr=+avx512f -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +define void @f(i16 %LGV2, i1 %LGV3) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %BB +; CHECK-NEXT: subq $2147483528, %rsp # imm = 0x7FFFFF88 +; CHECK-NEXT: .cfi_def_cfa_offset 2147483536 +; CHECK-NEXT: movb %sil, %cl +; CHECK-NEXT: movw %di, %ax +; CHECK-NEXT: movswq %ax, %rax +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movabsq $-2147483768, %rdx # imm = 0xFFFFFFFF7FFFFF88 +; CHECK-NEXT: movb %cl, (%rsp,%rdx) +; CHECK-NEXT: addq $2147483528, %rsp # imm = 0x7FFFFF88 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +BB: + %A = alloca i1, i33 2147483648, align 1 + %G = getelementptr i1, ptr %A, i16 %LGV2 + %G4 = getelementptr i1, ptr %G, i32 -2147483648 + store i1 %LGV3, ptr %G4, align 1 + ret void +} diff --git a/llvm/test/CodeGen/X86/huge-stack.ll b/llvm/test/CodeGen/X86/huge-stack.ll index 920033ba1182c3..41b8a0141b63d8 100644 --- a/llvm/test/CodeGen/X86/huge-stack.ll +++ b/llvm/test/CodeGen/X86/huge-stack.ll @@ -5,20 +5,70 @@ define void @foo() unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E +; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88 ; CHECK-NEXT: subq %rax, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 8589934470 -; CHECK-NEXT: movb $42, -129(%rsp) -; CHECK-NEXT: movb $43, -128(%rsp) -; CHECK-NEXT: movabsq $8589934462, %rax # imm = 0x1FFFFFF7E +; CHECK-NEXT: .cfi_def_cfa_offset 8589934480 +; CHECK-NEXT: movabsq $4294967177, %rax # imm = 0xFFFFFF89 +; CHECK-NEXT: movb $42, (%rsp,%rax) +; CHECK-NEXT: movb $43, -118(%rsp) +; CHECK-NEXT: movabsq $8589934472, %rax # imm = 0x1FFFFFF88 ; CHECK-NEXT: addq %rax, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq - %1 = alloca %large, align 1 - %2 = alloca %large, align 1 - %3 = getelementptr inbounds %large, ptr %1, i64 0, i64 0 - store i8 42, ptr %3, align 1 - %4 = getelementptr inbounds %large, ptr %2, i64 0, i64 0 - store i8 43, ptr %4, align 1 + %large1 = alloca %large, align 1 + %large2 = alloca %large, align 1 + %ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0 + store i8 42, ptr %ptrLarge1, align 1 + %ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0 + store i8 43, ptr %ptrLarge2, align 1 ret void } + +declare ptr @baz(ptr, ptr, ptr, ptr) + +define ptr @scavenge_spill() unnamed_addr #0 { +; CHECK-LABEL: scavenge_spill: +; CHECK: # %bb.0: +; CHECK-NEXT: movabsq $25769803816, %rax # imm = 0x600000028 +; CHECK-NEXT: subq %rax, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 25769803824 +; CHECK-NEXT: movabsq $21474836521, %rax # imm = 0x500000029 +; CHECK-NEXT: leaq (%rsp,%rax), %rdi +; CHECK-NEXT: movabsq $17179869226, %rax # imm = 0x40000002A +; CHECK-NEXT: leaq (%rsp,%rax), %rsi +; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movabsq $12884901931, %rax # imm = 0x30000002B +; CHECK-NEXT: leaq (%rsp,%rax), %rdx +; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movabsq $8589934636, %rax # imm = 0x20000002C +; CHECK-NEXT: leaq (%rsp,%rax), %rcx +; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: callq baz@PLT +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: leaq 46(%rsp), %rdi +; CHECK-NEXT: callq baz@PLT +; CHECK-NEXT: # kill: def $rcx killed $rax +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-NEXT: movabsq $25769803816, %rcx # imm = 0x600000028 +; CHECK-NEXT: addq %rcx, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %large1 = alloca %large, align 1 + %ptrLarge1 = getelementptr inbounds %large, ptr %large1, i64 0, i64 0 + %large2 = alloca %large, align 1 + %ptrLarge2 = getelementptr inbounds %large, ptr %large2, i64 0, i64 0 + %large3 = alloca %large, align 1 + %ptrLarge3 = getelementptr inbounds %large, ptr %large3, i64 0, i64 0 + %large4 = alloca %large, align 1 + %ptrLarge4 = getelementptr inbounds %large, ptr %large4, i64 0, i64 0 + %large5 = alloca %large, align 1 + %ptrLarge5 = getelementptr inbounds %large, ptr %large5, i64 0, i64 0 + %ret1 = call ptr @baz(ptr %ptrLarge1, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4) + %large6 = alloca %large, align 1 + %ptrLarge6 = getelementptr inbounds %large, ptr %large6, i64 0, i64 0 + %ret2 = call ptr @baz(ptr %ptrLarge6, ptr %ptrLarge2, ptr %ptrLarge3, ptr %ptrLarge4) + ret ptr %ret1 +} diff --git a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll index 9555ce032db90c..732fc6543e3141 100644 --- a/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll +++ b/llvm/test/CodeGen/X86/win64-stackprobe-overflow.ll @@ -10,5 +10,5 @@ start: attributes #0 = { nonlazybind uwtable "probe-stack"="probe_stack" "target-cpu"="x86-64" } ; CHECK-LABEL: foo: -; CHECK: movabsq $4294967304, %rax +; CHECK: movabsq $4294967312, %rax ; CHECK-NEXT: callq probe_stack