x86: implement preservenone_cc for 32 bit x86#177714
x86: implement preservenone_cc for 32 bit x86#177714
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: Simonas Kazlauskas (nagisa) ChangesThis a partial fix for #113401 and also potentially a template for anybody who wants to pursue a fix for Patch is 21.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/177714.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index f020e0b55141c..8acdb2784e500 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -130,14 +130,14 @@ def CC_#NAME : CallingConv<[
// __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
CCIfType<[v64i1], CCPromoteToType<i64>>,
- CCIfSubtarget<"is64Bit()", CCIfType<[i64],
+ CCIfSubtarget<"is64Bit()", CCIfType<[i64],
CCAssignToReg<RC.GPR_64>>>,
- CCIfSubtarget<"is32Bit()", CCIfType<[i64],
+ CCIfSubtarget<"is32Bit()", CCIfType<[i64],
CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
// float, double, float128 --> XMM
// In the case of SSE disabled --> save to stack
- CCIfType<[f32, f64, f128],
+ CCIfType<[f32, f64, f128],
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
// long double --> FP
@@ -145,39 +145,39 @@ def CC_#NAME : CallingConv<[
// __m128, __m128i, __m128d --> XMM
// In the case of SSE disabled --> save to stack
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
// __m256, __m256i, __m256d --> YMM
// In the case of SSE disabled --> save to stack
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
// __m512, __m512i, __m512d --> ZMM
// In the case of SSE disabled --> save to stack
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
CCIfSubtarget<"hasAVX512()",CCAssignToReg<RC.ZMM>>>,
// If no register was found -> assign to stack
// In 64 bit, assign 64/32 bit values to 8 byte stack
- CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64],
+ CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64],
CCAssignToStack<8, 8>>>,
// In 32 bit, assign 64/32 bit values to 8/4 byte stack
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64], CCAssignToStack<8, 4>>,
- // float 128 get stack slots whose size and alignment depends
+ // float 128 get stack slots whose size and alignment depends
// on the subtarget.
CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToStack<16, 16>>,
// 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
CCAssignToStack<32, 32>>,
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
@@ -205,28 +205,28 @@ def RetCC_#NAME : CallingConv<[
// __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
CCIfType<[v64i1], CCPromoteToType<i64>>,
- CCIfSubtarget<"is64Bit()", CCIfType<[i64],
+ CCIfSubtarget<"is64Bit()", CCIfType<[i64],
CCAssignToReg<RC.GPR_64>>>,
- CCIfSubtarget<"is32Bit()", CCIfType<[i64],
+ CCIfSubtarget<"is32Bit()", CCIfType<[i64],
CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
// long double --> FP
CCIfType<[f80], CCAssignToReg<RC.FP_RET>>,
// float, double, float128 --> XMM
- CCIfType<[f32, f64, f128],
+ CCIfType<[f32, f64, f128],
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
// __m128, __m128i, __m128d --> XMM
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
// __m256, __m256i, __m256d --> YMM
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
// __m512, __m512i, __m512d --> ZMM
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
CCIfSubtarget<"hasAVX512()", CCAssignToReg<RC.ZMM>>>
]>;
}
@@ -391,7 +391,7 @@ def RetCC_X86_Win64_C : CallingConv<[
// X86-64 vectorcall return-value convention.
def RetCC_X86_64_Vectorcall : CallingConv<[
// Vectorcall calling convention always returns FP values in XMMs.
- CCIfType<[f32, f64, f128],
+ CCIfType<[f32, f64, f128],
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
// Otherwise, everything is the same as Windows X86-64 C CC.
@@ -442,7 +442,7 @@ def RetCC_X86_64_AnyReg : CallingConv<[
defm X86_32_RegCall :
X86_RegCall_base<RC_X86_32_RegCall>;
defm X86_32_RegCallv4_Win :
- X86_RegCall_base<RC_X86_32_RegCallv4_Win>;
+ X86_RegCall_base<RC_X86_32_RegCallv4_Win>;
defm X86_Win64_RegCall :
X86_RegCall_base<RC_X86_64_RegCall_Win>;
defm X86_Win64_RegCallv4 :
@@ -493,7 +493,7 @@ def RetCC_X86_64 : CallingConv<[
CCIfSubtarget<"isTargetWin64()",
CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_SysV64_RegCall>>,
-
+
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
@@ -1187,6 +1187,7 @@ def CSR_64_AllRegs_AVX512 : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX,
(sequence "K%u", 0, 7)),
(sequence "XMM%u", 0, 15))>;
def CSR_64_NoneRegs : CalleeSavedRegs<(add RBP)>;
+def CSR_32_NoneRegs : CalleeSavedRegs<(add EBP)>;
// Standard C + YMM6-15
def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
@@ -1217,9 +1218,9 @@ def CSR_Win32_CFGuard_Check_NoSSE : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE, E
def CSR_Win32_CFGuard_Check : CalleeSavedRegs<(add CSR_32_RegCall, ECX)>;
def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
(sequence "R%u", 10, 15))>;
-def CSR_Win64_RegCall : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,
+def CSR_Win64_RegCall : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,
(sequence "XMM%u", 8, 15))>;
def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
(sequence "R%u", 12, 15))>;
-def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
+def CSR_SysV64_RegCall : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
(sequence "XMM%u", 8, 15))>;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 72f38133e21ff..31cfd854c86ff 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -244,6 +244,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool HasSSE = Subtarget.hasSSE1();
bool HasAVX = Subtarget.hasAVX();
bool HasAVX512 = Subtarget.hasAVX512();
+ bool Is32Bit = Subtarget.is32Bit();
bool CallsEHReturn = MF->callsEHReturn();
CallingConv::ID CC = F.getCallingConv();
@@ -274,7 +275,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_RT_AllRegs_AVX_SaveList;
return CSR_64_RT_AllRegs_SaveList;
case CallingConv::PreserveNone:
- return CSR_64_NoneRegs_SaveList;
+ return Is32Bit ? CSR_32_NoneRegs_SaveList
+ : CSR_64_NoneRegs_SaveList;
case CallingConv::CXX_FAST_TLS:
if (Is64Bit)
return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
diff --git a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll
index 500ebb139811a..2c0aa43a6bb02 100644
--- a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll
+++ b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i386-unknown-unknown -mcpu=pentium4 < %s | FileCheck %s --check-prefix=X32
; This test checks various function call behaviors between preserve_none and
; normal calling conventions.
@@ -10,36 +11,60 @@ declare preserve_nonecc void @callee(ptr)
; of incompatible calling convention. Callee saved registers are saved/restored
; around the call.
define void @caller1(ptr %a) {
-; CHECK-LABEL: caller1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %r15
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: pushq %r13
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: pushq %r12
-; CHECK-NEXT: .cfi_def_cfa_offset 40
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset %rbx, -48
-; CHECK-NEXT: .cfi_offset %r12, -40
-; CHECK-NEXT: .cfi_offset %r13, -32
-; CHECK-NEXT: .cfi_offset %r14, -24
-; CHECK-NEXT: .cfi_offset %r15, -16
-; CHECK-NEXT: movq %rdi, %r12
-; CHECK-NEXT: callq callee@PLT
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 40
-; CHECK-NEXT: popq %r12
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: popq %r13
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: popq %r15
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
+; X64-LABEL: caller1:
+; X64: # %bb.0:
+; X64-NEXT: pushq %r15
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: pushq %r14
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: pushq %r13
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: pushq %r12
+; X64-NEXT: .cfi_def_cfa_offset 40
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 48
+; X64-NEXT: .cfi_offset %rbx, -48
+; X64-NEXT: .cfi_offset %r12, -40
+; X64-NEXT: .cfi_offset %r13, -32
+; X64-NEXT: .cfi_offset %r14, -24
+; X64-NEXT: .cfi_offset %r15, -16
+; X64-NEXT: movq %rdi, %r12
+; X64-NEXT: callq callee@PLT
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 40
+; X64-NEXT: popq %r12
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: popq %r13
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: popq %r14
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %r15
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X32-LABEL: caller1:
+; X32: # %bb.0:
+; X32-NEXT: pushl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %edi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %edi, -12
+; X32-NEXT: .cfi_offset %ebx, -8
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: calll callee@PLT
+; X32-NEXT: addl $4, %esp
+; X32-NEXT: .cfi_adjust_cfa_offset -4
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: popl %edi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
tail call preserve_nonecc void @callee(ptr %a)
ret void
}
@@ -51,6 +76,13 @@ define preserve_nonecc void @caller2(ptr %a) {
; CHECK-LABEL: caller2:
; CHECK: # %bb.0:
; CHECK-NEXT: jmp callee@PLT # TAILCALL
+; X64-LABEL: caller2:
+; X64: # %bb.0:
+; X64-NEXT: jmp callee@PLT # TAILCALL
+;
+; X32-LABEL: caller2:
+; X32: # %bb.0:
+; X32-NEXT: jmp callee@PLT # TAILCALL
tail call preserve_nonecc void @callee(ptr %a)
ret void
}
@@ -58,71 +90,192 @@ define preserve_nonecc void @caller2(ptr %a) {
; Preserve_none function can use more registers to pass parameters.
declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11)
define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12) {
-; CHECK-LABEL: callee_with_many_param:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movq %r13, %r12
-; CHECK-NEXT: movq %r14, %r13
-; CHECK-NEXT: movq %r15, %r14
-; CHECK-NEXT: movq %rdi, %r15
-; CHECK-NEXT: movq %rsi, %rdi
-; CHECK-NEXT: movq %rdx, %rsi
-; CHECK-NEXT: movq %rcx, %rdx
-; CHECK-NEXT: movq %r8, %rcx
-; CHECK-NEXT: movq %r9, %r8
-; CHECK-NEXT: movq %r11, %r9
-; CHECK-NEXT: movq %rax, %r11
-; CHECK-NEXT: callq callee_with_many_param2@PLT
-; CHECK-NEXT: popq %rcx
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
+; X64-LABEL: callee_with_many_param:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rax
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: movq %r13, %r12
+; X64-NEXT: movq %r14, %r13
+; X64-NEXT: movq %r15, %r14
+; X64-NEXT: movq %rdi, %r15
+; X64-NEXT: movq %rsi, %rdi
+; X64-NEXT: movq %rdx, %rsi
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: movq %r8, %rcx
+; X64-NEXT: movq %r9, %r8
+; X64-NEXT: movq %r11, %r9
+; X64-NEXT: movq %rax, %r11
+; X64-NEXT: callq callee_with_many_param2@PLT
+; X64-NEXT: popq %rcx
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X32-LABEL: callee_with_many_param:
+; X32: # %bb.0:
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl {{[0-9]+}}(%esp)
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: calll callee_with_many_param2@PLT
+; X32-NEXT: addl $88, %esp
+; X32-NEXT: .cfi_adjust_cfa_offset -88
+; X32-NEXT: retl
%ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12)
ret i64 %ret
}
define i64 @caller3() {
-; CHECK-LABEL: caller3:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushq %r15
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: pushq %r13
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: pushq %r12
-; CHECK-NEXT: .cfi_def_cfa_offset 40
-; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 48
-; CHECK-NEXT: .cfi_offset %rbx, -48
-; CHECK-NEXT: .cfi_offset %r12, -40
-; CHECK-NEXT: .cfi_offset %r13, -32
-; CHECK-NEXT: .cfi_offset %r14, -24
-; CHECK-NEXT: .cfi_offset %r15, -16
-; CHECK-NEXT: movl $1, %r12d
-; CHECK-NEXT: movl $2, %r13d
-; CHECK-NEXT: movl $3, %r14d
-; CHECK-NEXT: movl $4, %r15d
-; CHECK-NEXT: movl $5, %edi
-; CHECK-NEXT: movl $6, %esi
-; CHECK-NEXT: movl $7, %edx
-; CHECK-NEXT: movl $8, %ecx
-; CHECK-NEXT: movl $9, %r8d
-; CHECK-NEXT: movl $10, %r9d
-; CHECK-NEXT: movl $11, %r11d
-; CHECK-NEXT: movl $12, %eax
-; CHECK-NEXT: callq callee_with_many_param@PLT
-; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: .cfi_def_cfa_offset 40
-; CHECK-NEXT: popq %r12
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: popq %r13
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: popq %r14
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: popq %r15
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: retq
+; X64-LABEL: caller3:
+; X64: # %bb.0:
+; X64-NEXT: pushq %r15
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: pushq %r14
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: pushq %r13
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: pushq %r12
+; X64-NEXT: .cfi_def_cfa_offset 40
+; X64-NEXT: pushq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 48
+; X64-NEXT: .cfi_offset %rbx, -48
+; X64-NEXT: .cfi_offset %r12, -40
+; X64-NEXT: .cfi_offset %r13, -32
+; X64-NEXT: .cfi_offset %r14, -24
+; X64-NEXT: .cfi_offset %r15, -16
+; X64-NEXT: movl $1, %r12d
+; X64-NEXT: movl $2, %r13d
+; X64-NEXT: movl $3, %r14d
+; X64-NEXT: movl $4, %r15d
+; X64-NEXT: movl $5, %edi
+; X64-NEXT: movl $6, %esi
+; X64-NEXT: movl $7, %edx
+; X64-NEXT: movl $8, %ecx
+; X64-NEXT: movl $9, %r8d
+; X64-NEXT: movl $10, %r9d
+; X64-NEXT: movl $11, %r11d
+; X64-NEXT: movl $12, %eax
+; X64-NEXT: callq callee_with_many_param@PLT
+; X64-NEXT: popq %rbx
+; X64-NEXT: .cfi_def_cfa_offset 40
+; X64-NEXT: popq %r12
+; X64-NEXT: .cfi_def_cfa_offset 32
+; X64-NEXT: popq %r13
+; X64-NEXT: .cfi_def_cfa_offset 24
+; X64-NEXT: popq %r14
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: popq %r15
+; X64-NEXT: .cfi_def_cfa_offset 8
+; X64-NEXT: retq
+;
+; X32-LABEL: caller3:
+; X32: # %bb.0:
+; X32-NEXT: pushl %ebx
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: pushl %edi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -16
+; X32-NEXT: .cfi_offset %edi, -12
+; X32-NEXT: .cfi_offset %ebx, -8
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $12
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $11
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $10
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $9
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $8
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $7
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $6
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $5
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $4
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $3
+; X32-NEXT: .cfi_adjust_cfa_offset 4
+; X32-NEXT: pushl $0
+; X32-NEXT: .cfi_adjust...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
06e1cb9 to
14873c0
Compare
nikic
left a comment
There was a problem hiding this comment.
Can you please avoid formatting changes to code you are not modifying?
14873c0 to
76ebf1a
Compare
|
Updated. That said trailing whitespace is pretty annoying when the editor strips it automatically T_T |
|
Do we really need such calling convention on 32-bit? There're only 8 registers, of which 4 for callee saves. And we use stack for parameters passing, so extra caller save registers don't help as 64-bit. I think we can simply emit errors in the front end if we just want to fix #113401. We have documented it's for 64-bit only in LangRef. |
|
It seems relatively straightforward to just support it in the backend and it makes it easier for the end-users to write code that happens to work on a 32 bit target without having to think too much about it. If changes to the backend were invasive or expansive, I'd tend to agree with you. |
But shouldn't the preserve_none calling convention pass in registers? It seems like this patch is missing that that part (what CC_X86_64_Preserve_None does). |
76ebf1a to
80308f6
Compare
80308f6 to
f99154d
Compare
|
Fair enough, it makes sense to use registers to pass arguments with this calling convention, although I do believe preservenone_cc is useful even without a register-based argument passing. I added a follow-up commit that I believe implements the part you were thinking of. |
| ; X32-NEXT: .cfi_def_cfa_offset 8 | ||
| ; X32-NEXT: .cfi_offset %ebp, -8 | ||
| ; X32-NEXT: movl %ecx, %esi | ||
| ; X32-NEXT: movl %edx, %ebx |
There was a problem hiding this comment.
I guess we could use ebx to pass arguments too? Not sure why x64 does not use rbx tho and I ended up replicating its behaviour here.
|
Personally, I don't like adding code that no specific requirement at the moment; |
Register based argument passing is part of the definition of this calling convention. Quoting LangRef:
This is an unstable calling convention, we can change it (and the x86_64 convention has changed already in the past). It looks like LangRef doesn't mention this, but the corresponding Clang docs do:
This should probably also be in LangRef. Also I just realized that there is an existing PR for preservenone_cc support on x86: #150106 |
|
Fair enough. I guess I should've looked for PRs before I went on to implement it, but in my defense, finding the root cause was most of the work and these changes took 5 minutes. |
This a partial fix for #113401 and also potentially a template for anybody who wants to pursue a fix for
preserve_mostcc/preserve_allccfor 32 bit x86 targets, as it was quite difficult to find where the issue lies (the assertion fires in somewhat a non-local manner when the spilled register is being converted to a dwarf one...)