Skip to content

x86: implement preservenone_cc for 32 bit x86#177714

Closed
nagisa wants to merge 2 commits intollvm:mainfrom
nagisa:32bit-preservenone
Closed

x86: implement preservenone_cc for 32 bit x86#177714
nagisa wants to merge 2 commits intollvm:mainfrom
nagisa:32bit-preservenone

Conversation

@nagisa
Copy link
Member

@nagisa nagisa commented Jan 24, 2026

This a partial fix for #113401 and also potentially a template for anybody who wants to pursue a fix for preserve_mostcc/preserve_allcc for 32 bit x86 targets, as it was quite difficult to find where the issue lies (the assertion fires in somewhat a non-local manner when the spilled register is being converted to a dwarf one...)

@llvmbot
Copy link
Member

llvmbot commented Jan 24, 2026

@llvm/pr-subscribers-backend-x86

Author: Simonas Kazlauskas (nagisa)

Changes

This a partial fix for #113401 and also potentially a template for anybody who wants to pursue a fix for preserve_mostcc/preserve_allcc for 32 bit x86 targets, as it was quite difficult to find where the issue lies (the assertion fires in somewhat a non-local manner when the spilled register is being converted to a dwarf one...)


Patch is 21.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/177714.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86CallingConv.td (+22-21)
  • (modified) llvm/lib/Target/X86/X86RegisterInfo.cpp (+3-1)
  • (modified) llvm/test/CodeGen/X86/preserve_nonecc_call.ll (+257-99)
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index f020e0b55141c..8acdb2784e500 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -130,14 +130,14 @@ def CC_#NAME : CallingConv<[
 
     // __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
     CCIfType<[v64i1], CCPromoteToType<i64>>,
-    CCIfSubtarget<"is64Bit()", CCIfType<[i64], 
+    CCIfSubtarget<"is64Bit()", CCIfType<[i64],
       CCAssignToReg<RC.GPR_64>>>,
-    CCIfSubtarget<"is32Bit()", CCIfType<[i64], 
+    CCIfSubtarget<"is32Bit()", CCIfType<[i64],
       CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
 
     // float, double, float128 --> XMM
     // In the case of SSE disabled --> save to stack
-    CCIfType<[f32, f64, f128], 
+    CCIfType<[f32, f64, f128],
       CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
 
     // long double --> FP
@@ -145,39 +145,39 @@ def CC_#NAME : CallingConv<[
 
     // __m128, __m128i, __m128d --> XMM
     // In the case of SSE disabled --> save to stack
-    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
       CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
 
     // __m256, __m256i, __m256d --> YMM
     // In the case of SSE disabled --> save to stack
-    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
       CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
 
     // __m512, __m512i, __m512d --> ZMM
     // In the case of SSE disabled --> save to stack
-    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], 
+    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
       CCIfSubtarget<"hasAVX512()",CCAssignToReg<RC.ZMM>>>,
 
     // If no register was found -> assign to stack
 
     // In 64 bit, assign 64/32 bit values to 8 byte stack
-    CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64], 
+    CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64],
       CCAssignToStack<8, 8>>>,
 
     // In 32 bit, assign 64/32 bit values to 8/4 byte stack
     CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
     CCIfType<[i64, f64], CCAssignToStack<8, 4>>,
 
-    // float 128 get stack slots whose size and alignment depends 
+    // float 128 get stack slots whose size and alignment depends
     // on the subtarget.
     CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
 
     // Vectors get 16-byte stack slots that are 16-byte aligned.
-    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
       CCAssignToStack<16, 16>>,
 
     // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
-    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
       CCAssignToStack<32, 32>>,
 
     // 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
@@ -205,28 +205,28 @@ def RetCC_#NAME : CallingConv<[
 
     // __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
     CCIfType<[v64i1], CCPromoteToType<i64>>,
-    CCIfSubtarget<"is64Bit()", CCIfType<[i64], 
+    CCIfSubtarget<"is64Bit()", CCIfType<[i64],
       CCAssignToReg<RC.GPR_64>>>,
-    CCIfSubtarget<"is32Bit()", CCIfType<[i64], 
+    CCIfSubtarget<"is32Bit()", CCIfType<[i64],
       CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
 
     // long double --> FP
     CCIfType<[f80], CCAssignToReg<RC.FP_RET>>,
 
     // float, double, float128 --> XMM
-    CCIfType<[f32, f64, f128], 
+    CCIfType<[f32, f64, f128],
       CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
 
     // __m128, __m128i, __m128d --> XMM
-    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
       CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
 
     // __m256, __m256i, __m256d --> YMM
-    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
       CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
 
     // __m512, __m512i, __m512d --> ZMM
-    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], 
+    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
       CCIfSubtarget<"hasAVX512()", CCAssignToReg<RC.ZMM>>>
 ]>;
 }
@@ -391,7 +391,7 @@ def RetCC_X86_Win64_C : CallingConv<[
 // X86-64 vectorcall return-value convention.
 def RetCC_X86_64_Vectorcall : CallingConv<[
   // Vectorcall calling convention always returns FP values in XMMs.
-  CCIfType<[f32, f64, f128], 
+  CCIfType<[f32, f64, f128],
     CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
 
   // Otherwise, everything is the same as Windows X86-64 C CC.
@@ -442,7 +442,7 @@ def RetCC_X86_64_AnyReg : CallingConv<[
 defm X86_32_RegCall :
 	 X86_RegCall_base<RC_X86_32_RegCall>;
 defm X86_32_RegCallv4_Win :
-	 X86_RegCall_base<RC_X86_32_RegCallv4_Win>; 
+	 X86_RegCall_base<RC_X86_32_RegCallv4_Win>;
 defm X86_Win64_RegCall :
      X86_RegCall_base<RC_X86_64_RegCall_Win>;
 defm X86_Win64_RegCallv4 :
@@ -493,7 +493,7 @@ def RetCC_X86_64 : CallingConv<[
           CCIfSubtarget<"isTargetWin64()",
                         CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
   CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_SysV64_RegCall>>,
-          
+
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
 
@@ -1187,6 +1187,7 @@ def CSR_64_AllRegs_AVX512 : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX,
                                                       (sequence "K%u", 0, 7)),
                                                  (sequence "XMM%u", 0, 15))>;
 def CSR_64_NoneRegs    : CalleeSavedRegs<(add RBP)>;
+def CSR_32_NoneRegs    : CalleeSavedRegs<(add EBP)>;
 
 // Standard C + YMM6-15
 def CSR_Win64_Intel_OCL_BI_AVX : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12,
@@ -1217,9 +1218,9 @@ def CSR_Win32_CFGuard_Check_NoSSE : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE, E
 def CSR_Win32_CFGuard_Check       : CalleeSavedRegs<(add CSR_32_RegCall, ECX)>;
 def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
                                               (sequence "R%u", 10, 15))>;
-def CSR_Win64_RegCall       : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,                                  
+def CSR_Win64_RegCall       : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,
                                               (sequence "XMM%u", 8, 15))>;
 def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP,
                                                (sequence "R%u", 12, 15))>;
-def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
+def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,
                                                (sequence "XMM%u", 8, 15))>;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 72f38133e21ff..31cfd854c86ff 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -244,6 +244,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   bool HasSSE = Subtarget.hasSSE1();
   bool HasAVX = Subtarget.hasAVX();
   bool HasAVX512 = Subtarget.hasAVX512();
+  bool Is32Bit = Subtarget.is32Bit();
   bool CallsEHReturn = MF->callsEHReturn();
 
   CallingConv::ID CC = F.getCallingConv();
@@ -274,7 +275,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
       return CSR_64_RT_AllRegs_AVX_SaveList;
     return CSR_64_RT_AllRegs_SaveList;
   case CallingConv::PreserveNone:
-    return CSR_64_NoneRegs_SaveList;
+    return Is32Bit ? CSR_32_NoneRegs_SaveList
+                   : CSR_64_NoneRegs_SaveList;
   case CallingConv::CXX_FAST_TLS:
     if (Is64Bit)
       return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
diff --git a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll
index 500ebb139811a..2c0aa43a6bb02 100644
--- a/llvm/test/CodeGen/X86/preserve_nonecc_call.ll
+++ b/llvm/test/CodeGen/X86/preserve_nonecc_call.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i386-unknown-unknown -mcpu=pentium4 < %s | FileCheck %s --check-prefix=X32
 
 ; This test checks various function call behaviors between preserve_none and
 ; normal calling conventions.
@@ -10,36 +11,60 @@ declare preserve_nonecc void @callee(ptr)
 ; of incompatible calling convention. Callee saved registers are saved/restored
 ; around the call.
 define void @caller1(ptr %a) {
-; CHECK-LABEL: caller1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    pushq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    .cfi_offset %rbx, -48
-; CHECK-NEXT:    .cfi_offset %r12, -40
-; CHECK-NEXT:    .cfi_offset %r13, -32
-; CHECK-NEXT:    .cfi_offset %r14, -24
-; CHECK-NEXT:    .cfi_offset %r15, -16
-; CHECK-NEXT:    movq %rdi, %r12
-; CHECK-NEXT:    callq callee@PLT
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    popq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    popq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    popq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; X64-LABEL: caller1:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    .cfi_def_cfa_offset 40
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 48
+; X64-NEXT:    .cfi_offset %rbx, -48
+; X64-NEXT:    .cfi_offset %r12, -40
+; X64-NEXT:    .cfi_offset %r13, -32
+; X64-NEXT:    .cfi_offset %r14, -24
+; X64-NEXT:    .cfi_offset %r15, -16
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    callq callee@PLT
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 40
+; X64-NEXT:    popq %r12
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    popq %r13
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    popq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    popq %r15
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X32-LABEL: caller1:
+; X32:       # %bb.0:
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    .cfi_def_cfa_offset 12
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 16
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %edi, -12
+; X32-NEXT:    .cfi_offset %ebx, -8
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    calll callee@PLT
+; X32-NEXT:    addl $4, %esp
+; X32-NEXT:    .cfi_adjust_cfa_offset -4
+; X32-NEXT:    popl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 12
+; X32-NEXT:    popl %edi
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    popl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 4
+; X32-NEXT:    retl
   tail call preserve_nonecc void @callee(ptr %a)
   ret void
 }
@@ -51,6 +76,13 @@ define preserve_nonecc void @caller2(ptr %a) {
 ; CHECK-LABEL: caller2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    jmp callee@PLT # TAILCALL
+; X64-LABEL: caller2:
+; X64:       # %bb.0:
+; X64-NEXT:    jmp callee@PLT # TAILCALL
+;
+; X32-LABEL: caller2:
+; X32:       # %bb.0:
+; X32-NEXT:    jmp callee@PLT # TAILCALL
   tail call preserve_nonecc void @callee(ptr %a)
   ret void
 }
@@ -58,71 +90,192 @@ define preserve_nonecc void @caller2(ptr %a) {
 ; Preserve_none function can use more registers to pass parameters.
 declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11)
 define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12) {
-; CHECK-LABEL: callee_with_many_param:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movq %r13, %r12
-; CHECK-NEXT:    movq %r14, %r13
-; CHECK-NEXT:    movq %r15, %r14
-; CHECK-NEXT:    movq %rdi, %r15
-; CHECK-NEXT:    movq %rsi, %rdi
-; CHECK-NEXT:    movq %rdx, %rsi
-; CHECK-NEXT:    movq %rcx, %rdx
-; CHECK-NEXT:    movq %r8, %rcx
-; CHECK-NEXT:    movq %r9, %r8
-; CHECK-NEXT:    movq %r11, %r9
-; CHECK-NEXT:    movq %rax, %r11
-; CHECK-NEXT:    callq callee_with_many_param2@PLT
-; CHECK-NEXT:    popq %rcx
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; X64-LABEL: callee_with_many_param:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rax
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    movq %r13, %r12
+; X64-NEXT:    movq %r14, %r13
+; X64-NEXT:    movq %r15, %r14
+; X64-NEXT:    movq %rdi, %r15
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    movq %r8, %rcx
+; X64-NEXT:    movq %r9, %r8
+; X64-NEXT:    movq %r11, %r9
+; X64-NEXT:    movq %rax, %r11
+; X64-NEXT:    callq callee_with_many_param2@PLT
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X32-LABEL: callee_with_many_param:
+; X32:       # %bb.0:
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl {{[0-9]+}}(%esp)
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    calll callee_with_many_param2@PLT
+; X32-NEXT:    addl $88, %esp
+; X32-NEXT:    .cfi_adjust_cfa_offset -88
+; X32-NEXT:    retl
   %ret = call preserve_nonecc i64 @callee_with_many_param2(i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12)
   ret i64 %ret
 }
 
 define i64 @caller3() {
-; CHECK-LABEL: caller3:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    pushq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    pushq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    pushq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    .cfi_offset %rbx, -48
-; CHECK-NEXT:    .cfi_offset %r12, -40
-; CHECK-NEXT:    .cfi_offset %r13, -32
-; CHECK-NEXT:    .cfi_offset %r14, -24
-; CHECK-NEXT:    .cfi_offset %r15, -16
-; CHECK-NEXT:    movl $1, %r12d
-; CHECK-NEXT:    movl $2, %r13d
-; CHECK-NEXT:    movl $3, %r14d
-; CHECK-NEXT:    movl $4, %r15d
-; CHECK-NEXT:    movl $5, %edi
-; CHECK-NEXT:    movl $6, %esi
-; CHECK-NEXT:    movl $7, %edx
-; CHECK-NEXT:    movl $8, %ecx
-; CHECK-NEXT:    movl $9, %r8d
-; CHECK-NEXT:    movl $10, %r9d
-; CHECK-NEXT:    movl $11, %r11d
-; CHECK-NEXT:    movl $12, %eax
-; CHECK-NEXT:    callq callee_with_many_param@PLT
-; CHECK-NEXT:    popq %rbx
-; CHECK-NEXT:    .cfi_def_cfa_offset 40
-; CHECK-NEXT:    popq %r12
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
-; CHECK-NEXT:    popq %r13
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; CHECK-NEXT:    popq %r14
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    popq %r15
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    retq
+; X64-LABEL: caller3:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    .cfi_def_cfa_offset 40
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 48
+; X64-NEXT:    .cfi_offset %rbx, -48
+; X64-NEXT:    .cfi_offset %r12, -40
+; X64-NEXT:    .cfi_offset %r13, -32
+; X64-NEXT:    .cfi_offset %r14, -24
+; X64-NEXT:    .cfi_offset %r15, -16
+; X64-NEXT:    movl $1, %r12d
+; X64-NEXT:    movl $2, %r13d
+; X64-NEXT:    movl $3, %r14d
+; X64-NEXT:    movl $4, %r15d
+; X64-NEXT:    movl $5, %edi
+; X64-NEXT:    movl $6, %esi
+; X64-NEXT:    movl $7, %edx
+; X64-NEXT:    movl $8, %ecx
+; X64-NEXT:    movl $9, %r8d
+; X64-NEXT:    movl $10, %r9d
+; X64-NEXT:    movl $11, %r11d
+; X64-NEXT:    movl $12, %eax
+; X64-NEXT:    callq callee_with_many_param@PLT
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    .cfi_def_cfa_offset 40
+; X64-NEXT:    popq %r12
+; X64-NEXT:    .cfi_def_cfa_offset 32
+; X64-NEXT:    popq %r13
+; X64-NEXT:    .cfi_def_cfa_offset 24
+; X64-NEXT:    popq %r14
+; X64-NEXT:    .cfi_def_cfa_offset 16
+; X64-NEXT:    popq %r15
+; X64-NEXT:    .cfi_def_cfa_offset 8
+; X64-NEXT:    retq
+;
+; X32-LABEL: caller3:
+; X32:       # %bb.0:
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    .cfi_def_cfa_offset 12
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 16
+; X32-NEXT:    .cfi_offset %esi, -16
+; X32-NEXT:    .cfi_offset %edi, -12
+; X32-NEXT:    .cfi_offset %ebx, -8
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $12
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $11
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $10
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $9
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $8
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $7
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $6
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $5
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $4
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $3
+; X32-NEXT:    .cfi_adjust_cfa_offset 4
+; X32-NEXT:    pushl $0
+; X32-NEXT:    .cfi_adjust...
[truncated]

@github-actions
Copy link

github-actions bot commented Jan 24, 2026

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please avoid formatting changes to code you are not modifying?

@nagisa nagisa force-pushed the 32bit-preservenone branch from 14873c0 to 76ebf1a Compare January 24, 2026 13:59
@nagisa
Copy link
Member Author

nagisa commented Jan 24, 2026

Updated. That said trailing whitespace is pretty annoying when the editor strips it automatically T_T

@phoebewang
Copy link
Contributor

Do we really need such calling convention on 32-bit? There're only 8 registers, of which 4 for callee saves. And we use stack for parameters passing, so extra caller save registers don't help as 64-bit.

I think we can simply emit errors in the front end if we just want to fix #113401. We have documented it's for 64-bit only in LangRef.

@nagisa
Copy link
Member Author

nagisa commented Jan 24, 2026

It seems relatively straightforward to just support it in the backend and it makes it easier for the end-users to write code that happens to work on a 32 bit target without having to think too much about it.

If changes to the backend were invasive or expansive, I'd tend to agree with you.

@nikic
Copy link
Contributor

nikic commented Jan 24, 2026

Do we really need such calling convention on 32-bit? There're only 8 registers, of which 4 for callee saves. And we use stack for parameters passing, so extra caller save registers don't help as 64-bit.

But shouldn't the preserve_none calling convention pass in registers? It seems like this patch is missing that that part (what CC_X86_64_Preserve_None does).

@nagisa nagisa force-pushed the 32bit-preservenone branch from 76ebf1a to 80308f6 Compare January 24, 2026 18:17
@nagisa nagisa force-pushed the 32bit-preservenone branch from 80308f6 to f99154d Compare January 24, 2026 18:19
@nagisa
Copy link
Member Author

nagisa commented Jan 24, 2026

Fair enough, it makes sense to use registers to pass arguments with this calling convention, although I do believe preservenone_cc is useful even without a register-based argument passing.

I added a follow-up commit that I believe implements the part you were thinking of.

; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %ecx, %esi
; X32-NEXT: movl %edx, %ebx
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we could use ebx to pass arguments too? Not sure why x64 does not use rbx tho and I ended up replicating its behaviour here.

@phoebewang
Copy link
Contributor

Personally, I don't like adding code that no specific requirement at the moment;
User-wise, we should not assume what they really want (register vs. stack-based argument passing);
ABI-wise, we cannot implement it randomly in a way and change it in another way arbitrarily;

@nikic
Copy link
Contributor

nikic commented Jan 25, 2026

User-wise, we should not assume what they really want (register vs. stack-based argument passing);

Register based argument passing is part of the definition of this calling convention. Quoting LangRef:

It also uses all general registers to pass arguments.

ABI-wise, we cannot implement it randomly in a way and change it in another way arbitrarily;

This is an unstable calling convention, we can change it (and the x86_64 convention has changed already in the past). It looks like LangRef doesn't mention this, but the corresponding Clang docs do:

preserve_none’s ABI is still unstable, and may be changed in the future.

This should probably also be in LangRef.


Also I just realized that there is an existing PR for preservenone_cc support on x86: #150106

@nagisa
Copy link
Member Author

nagisa commented Jan 25, 2026

Fair enough. I guess I should've looked for PRs before I went on to implement it, but in my defense, finding the root cause was most of the work and these changes took 5 minutes.

@nagisa nagisa closed this Jan 25, 2026
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants