diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 42a09106b3a3a..c96d3c15a8823 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -41,7 +41,7 @@ STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue"); STATISTIC(NumFrameExtraProbe, "Number of extra stack probes generated in prologue"); -STATISTIC(NumFunctionUsingPush2Pop2, "Number of funtions using push2/pop2"); +STATISTIC(NumFunctionUsingPush2Pop2, "Number of functions using push2/pop2"); using namespace llvm; @@ -3021,8 +3021,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters( // Push GPRs. It increases frame size. const MachineFunction &MF = *MBB.getParent(); const X86MachineFunctionInfo *X86FI = MF.getInfo(); - if (X86FI->padForPush2Pop2()) - emitSPUpdate(MBB, MI, DL, -(int64_t)SlotSize, /*InEpilogue=*/false); + if (X86FI->padForPush2Pop2()) { + assert(SlotSize == 8 && "Unexpected slot size for padding!"); + BuildMI(MBB, MI, DL, TII.get(X86::PUSH64r)) + .addReg(X86::RAX, RegState::Undef) + .setMIFlag(MachineInstr::FrameSetup); + } // Update LiveIn of the basic block and decide whether we can add a kill flag // to the use. diff --git a/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll b/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll new file mode 100644 index 0000000000000..a7795f0b6bc5d --- /dev/null +++ b/llvm/test/CodeGen/X86/apx/pp2-with-stack-clash-protection.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64 -mattr=+push2pop2 | FileCheck %s + + +; This test is to check if "pushq %rax" is emitted correctly for push2pop2 +; padding when it's built with -stack-clash-protection option. + +define i32 @foo(ptr %src1, i32 %len, ptr %src2, ptr %dst, i1 %cmp, i32 %sub) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: push2 %r15, %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: push2 %r13, %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: push2 %rbx, %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset %rbx, -64 +; CHECK-NEXT: .cfi_offset %r12, -56 +; CHECK-NEXT: .cfi_offset %r13, -48 +; CHECK-NEXT: .cfi_offset %r14, -40 +; CHECK-NEXT: .cfi_offset %r15, -32 +; CHECK-NEXT: .cfi_offset %rbp, -24 +; CHECK-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %r8d, %ebp +; CHECK-NEXT: movq %rcx, %r14 +; CHECK-NEXT: movq %rdx, %r15 +; CHECK-NEXT: movl %esi, %r12d +; CHECK-NEXT: movq %rdi, %r13 +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %while.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq %r15, %rdi +; CHECK-NEXT: movq %r13, %rsi +; CHECK-NEXT: movq %r14, %rdx +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl %r12d, %r8d +; CHECK-NEXT: callq *%rbx +; CHECK-NEXT: testb $1, %bpl +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %while.end +; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: pop2 %r12, %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pop2 %r14, %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pop2 %rbp, %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %call5 = tail call i32 null(ptr %src2, ptr %src1, ptr %dst, i32 0, i32 %len) + br i1 %cmp, label %while.body, label %while.end + +while.end: ; preds = %while.body + ret i32 %sub +} + +attributes #0 = { "probe-stack"="inline-asm"}