From a52e7d53fe073e65b24acee1fe18d97251600547 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Mon, 8 Jul 2024 15:51:55 +0800 Subject: [PATCH] Address comments - Restrict the option to AIX - In IR use module flag instead of function attribute - Added more cases - Simplify change to AsmPrinter - Added cases for byref --- clang/include/clang/Driver/Options.td | 3 +- clang/lib/CodeGen/CGCall.cpp | 3 - clang/lib/CodeGen/Targets/PPC.cpp | 11 + clang/test/CodeGen/PowerPC/save-reg-params.c | 6 +- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 9 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 12 +- .../Target/PowerPC/PPCMachineFunctionInfo.h | 6 - llvm/test/CodeGen/PowerPC/save-reg-params.ll | 356 ++++++++++-------- 8 files changed, 233 insertions(+), 173 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4135f0db604509..c0eb280a84ef89 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5041,8 +5041,9 @@ def mno_spe : Flag<["-"], "mno-spe">, Group; def mefpu2 : Flag<["-"], "mefpu2">, Group; } // let Flags = [TargetSpecific] def msave_reg_params : Flag<["-"], "msave-reg-params">, Group, + Flags<[TargetSpecific]>, Visibility<[ClangOption, CC1Option]>, - HelpText<"Save arguments passed by registers to stack">, + HelpText<"Save arguments passed by registers to ABI-defined stack positions">, MarshallingInfoFlag>; def mabi_EQ_quadword_atomics : Flag<["-"], "mabi=quadword-atomics">, Group, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 8269755cdbf89d..2b301130ef7b70 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1931,9 +1931,6 @@ static void getTrivialDefaultFunctionAttributes( if (CodeGenOpts.NullPointerIsValid) FuncAttrs.addAttribute(llvm::Attribute::NullPointerIsValid); - if (CodeGenOpts.SaveRegParams) - FuncAttrs.addAttribute("save-reg-params"); - if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore) FuncAttrs.addAttribute("no-trapping-math", "true"); diff --git a/clang/lib/CodeGen/Targets/PPC.cpp b/clang/lib/CodeGen/Targets/PPC.cpp index e4155810963eb8..185f4802b11499 100644 --- a/clang/lib/CodeGen/Targets/PPC.cpp +++ b/clang/lib/CodeGen/Targets/PPC.cpp @@ -146,6 +146,10 @@ class AIXTargetCodeGenInfo : public TargetCodeGenInfo { void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; + + void emitTargetMetadata(CodeGen::CodeGenModule &CGM, + const llvm::MapVector + &MangledDeclNames) const override; }; } // namespace @@ -321,6 +325,13 @@ void AIXTargetCodeGenInfo::setTargetAttributes( } } +void AIXTargetCodeGenInfo::emitTargetMetadata( + CodeGen::CodeGenModule &CGM, + const llvm::MapVector &MangledDeclNames) const { + if (CGM.getCodeGenOpts().SaveRegParams) + CGM.getModule().addModuleFlag(llvm::Module::Error, "save-reg-params", 1); +} + // PowerPC-32 namespace { /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information. diff --git a/clang/test/CodeGen/PowerPC/save-reg-params.c b/clang/test/CodeGen/PowerPC/save-reg-params.c index 6599310afa41a3..7e98de22d3a519 100644 --- a/clang/test/CodeGen/PowerPC/save-reg-params.c +++ b/clang/test/CodeGen/PowerPC/save-reg-params.c @@ -1,12 +1,10 @@ -// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s -msave-reg-params | FileCheck -check-prefix=SAVE %s // RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm -o - %s -msave-reg-params | FileCheck -check-prefix=SAVE %s // RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm -o - %s -msave-reg-params | FileCheck -check-prefix=SAVE %s -// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefix=NOSAVE %s // RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm -o - %s | FileCheck -check-prefix=NOSAVE %s // RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm -o - %s | FileCheck -check-prefix=NOSAVE %s void bar(int); void foo(int x) { bar(x); } -// SAVE: attributes #{{[0-9]+}} = { {{.+}} "save-reg-params" {{.+}} } -// NOSAVE-NOT: "save-reg-params"ยทยทยท \ No newline at end of file +// SAVE: !{i32 1, !"save-reg-params", i32 1} +// NOSAVE-NOT: !{i32 1, !"save-reg-params", i32 1} \ No newline at end of file diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 085a67e9194215..2fa504218e3157 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2496,7 +2496,12 @@ void PPCAIXAsmPrinter::emitTracebackTable() { uint32_t GPRSaved = 0; - if (FI->getForceGPRSaveCount() < 0) { + bool SaveParams = + MF->getFunction().getParent()->getModuleFlag("save-reg-params"); + if (SaveParams) { + // Assuming eight GPRs matches XL behavior for varargs. + GPRSaved = 8; + } else { // X13 is reserved under 64-bit environment. unsigned GPRBegin = Subtarget->isPPC64() ? PPC::X14 : PPC::R13; unsigned GPREnd = Subtarget->isPPC64() ? PPC::X31 : PPC::R31; @@ -2507,8 +2512,6 @@ void PPCAIXAsmPrinter::emitTracebackTable() { break; } } - } else { - GPRSaved = FI->getForceGPRSaveCount(); } SecondHalfOfMandatoryField |= (GPRSaved << TracebackTable::GPRSavedShift) & diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 9b5d448d0ce448..e519551934b1e6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7225,7 +7225,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize)); uint64_t SaveStackPos = CCInfo.getStackSize(); - bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params"); + bool SaveParams = + MF.getFunction().getParent()->getModuleFlag("save-reg-params"); CCInfo.AnalyzeFormalArguments(Ins, CC_AIX); SmallVector MemOps; @@ -7247,10 +7248,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( if (SaveParams && VA.isRegLoc() && !Flags.isByVal()) { const TargetRegisterClass *RegClass = getRegClassForSVT( LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX()); - // On PPC64, we need to use std instead of stw for GPR. + // On PPC64, debugger assumes extended 8-byte value are stored from GPR. MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT; const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass); - SDValue Parm = DAG.getRegister(VReg, SaveVT); + SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT); int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN, @@ -7478,11 +7479,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( FuncInfo->setMinReservedArea(CallerReservedArea); if (isVarArg) { - // Maximum number of saved GPR in traceback table is 8, for varargs, - // assuming eight GPRs matches XL behavior. - if (SaveParams) - FuncInfo->setForceGPRSaveCount(8); - FuncInfo->setVarArgsFrameIndex( MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); diff --git a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h index a9e7d63237c7bc..b7d14da05ee248 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -150,9 +150,6 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// to use SExt/ZExt flags in later optimization. std::vector> LiveInAttrs; - /// Set a fixed number of saved GPRs, negative if it's non-fixed. - int ForceGPRSaveCount = -1; - /// Flags for aix-shared-lib-tls-model-opt, will be lazily initialized for /// each function. bool AIXFuncUseTLSIEForLD = false; @@ -166,9 +163,6 @@ class PPCFunctionInfo : public MachineFunctionInfo { const DenseMap &Src2DstMBB) const override; - int getForceGPRSaveCount() const { return ForceGPRSaveCount; } - void setForceGPRSaveCount(int Num) { ForceGPRSaveCount = Num; } - int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } diff --git a/llvm/test/CodeGen/PowerPC/save-reg-params.ll b/llvm/test/CodeGen/PowerPC/save-reg-params.ll index ad29a202db115f..3b6cf694254096 100644 --- a/llvm/test/CodeGen/PowerPC/save-reg-params.ll +++ b/llvm/test/CodeGen/PowerPC/save-reg-params.ll @@ -2,20 +2,20 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix -mcpu=pwr7 < %s | FileCheck %s -check-prefix=32BIT ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix -mcpu=pwr7 < %s | FileCheck %s -check-prefix=64BIT -define void @i64_join(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h, i64 %i, i64 %j) #0 { +define void @i64_join(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h, i64 %i, i64 %j) { ; 32BIT-LABEL: i64_join: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stw 10, 116(1) -; 32BIT-NEXT: stw 9, 112(1) -; 32BIT-NEXT: stw 8, 108(1) -; 32BIT-NEXT: stw 7, 104(1) -; 32BIT-NEXT: stw 6, 100(1) -; 32BIT-NEXT: stw 5, 96(1) -; 32BIT-NEXT: stw 4, 92(1) ; 32BIT-NEXT: stw 3, 88(1) +; 32BIT-NEXT: stw 4, 92(1) +; 32BIT-NEXT: stw 5, 96(1) +; 32BIT-NEXT: stw 6, 100(1) +; 32BIT-NEXT: stw 7, 104(1) +; 32BIT-NEXT: stw 8, 108(1) +; 32BIT-NEXT: stw 9, 112(1) +; 32BIT-NEXT: stw 10, 116(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -28,14 +28,14 @@ define void @i64_join(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i6 ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: std 10, 216(1) -; 64BIT-NEXT: std 9, 208(1) -; 64BIT-NEXT: std 8, 200(1) -; 64BIT-NEXT: std 7, 192(1) -; 64BIT-NEXT: std 6, 184(1) -; 64BIT-NEXT: std 5, 176(1) -; 64BIT-NEXT: std 4, 168(1) ; 64BIT-NEXT: std 3, 160(1) +; 64BIT-NEXT: std 4, 168(1) +; 64BIT-NEXT: std 5, 176(1) +; 64BIT-NEXT: std 6, 184(1) +; 64BIT-NEXT: std 7, 192(1) +; 64BIT-NEXT: std 8, 200(1) +; 64BIT-NEXT: std 9, 208(1) +; 64BIT-NEXT: std 10, 216(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -56,20 +56,20 @@ entry: ret void } -define void @i64_join_missing(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h, i64 %i, i64 %j) #0 { +define void @i64_join_missing(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h, i64 %i, i64 %j) { ; 32BIT-LABEL: i64_join_missing: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stw 10, 116(1) -; 32BIT-NEXT: stw 9, 112(1) -; 32BIT-NEXT: stw 8, 108(1) -; 32BIT-NEXT: stw 7, 104(1) -; 32BIT-NEXT: stw 6, 100(1) -; 32BIT-NEXT: stw 5, 96(1) -; 32BIT-NEXT: stw 4, 92(1) ; 32BIT-NEXT: stw 3, 88(1) +; 32BIT-NEXT: stw 4, 92(1) +; 32BIT-NEXT: stw 5, 96(1) +; 32BIT-NEXT: stw 6, 100(1) +; 32BIT-NEXT: stw 7, 104(1) +; 32BIT-NEXT: stw 8, 108(1) +; 32BIT-NEXT: stw 9, 112(1) +; 32BIT-NEXT: stw 10, 116(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -82,14 +82,14 @@ define void @i64_join_missing(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i6 ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: std 10, 216(1) -; 64BIT-NEXT: std 9, 208(1) -; 64BIT-NEXT: std 8, 200(1) -; 64BIT-NEXT: std 7, 192(1) -; 64BIT-NEXT: std 6, 184(1) -; 64BIT-NEXT: std 5, 176(1) -; 64BIT-NEXT: std 4, 168(1) ; 64BIT-NEXT: std 3, 160(1) +; 64BIT-NEXT: std 4, 168(1) +; 64BIT-NEXT: std 5, 176(1) +; 64BIT-NEXT: std 6, 184(1) +; 64BIT-NEXT: std 7, 192(1) +; 64BIT-NEXT: std 8, 200(1) +; 64BIT-NEXT: std 9, 208(1) +; 64BIT-NEXT: std 10, 216(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -109,20 +109,20 @@ entry: ret void } -define void @i32_join(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h, i32 signext %i, i32 signext %j) #0 { +define void @i32_join(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h, i32 signext %i, i32 signext %j) { ; 32BIT-LABEL: i32_join: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stw 10, 116(1) -; 32BIT-NEXT: stw 9, 112(1) -; 32BIT-NEXT: stw 8, 108(1) -; 32BIT-NEXT: stw 7, 104(1) -; 32BIT-NEXT: stw 6, 100(1) -; 32BIT-NEXT: stw 5, 96(1) -; 32BIT-NEXT: stw 4, 92(1) ; 32BIT-NEXT: stw 3, 88(1) +; 32BIT-NEXT: stw 4, 92(1) +; 32BIT-NEXT: stw 5, 96(1) +; 32BIT-NEXT: stw 6, 100(1) +; 32BIT-NEXT: stw 7, 104(1) +; 32BIT-NEXT: stw 8, 108(1) +; 32BIT-NEXT: stw 9, 112(1) +; 32BIT-NEXT: stw 10, 116(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -135,14 +135,14 @@ define void @i32_join(i32 signext %a, i32 signext %b, i32 signext %c, i32 signex ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: std 10, 216(1) -; 64BIT-NEXT: std 9, 208(1) -; 64BIT-NEXT: std 8, 200(1) -; 64BIT-NEXT: std 7, 192(1) -; 64BIT-NEXT: std 6, 184(1) -; 64BIT-NEXT: std 5, 176(1) -; 64BIT-NEXT: std 4, 168(1) ; 64BIT-NEXT: std 3, 160(1) +; 64BIT-NEXT: std 4, 168(1) +; 64BIT-NEXT: std 5, 176(1) +; 64BIT-NEXT: std 6, 184(1) +; 64BIT-NEXT: std 7, 192(1) +; 64BIT-NEXT: std 8, 200(1) +; 64BIT-NEXT: std 9, 208(1) +; 64BIT-NEXT: std 10, 216(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -163,20 +163,20 @@ entry: ret void } -define void @i32_join_missing(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h, i32 signext %i, i32 signext %j) #0 { +define void @i32_join_missing(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h, i32 signext %i, i32 signext %j) { ; 32BIT-LABEL: i32_join_missing: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stw 10, 116(1) -; 32BIT-NEXT: stw 9, 112(1) -; 32BIT-NEXT: stw 8, 108(1) -; 32BIT-NEXT: stw 7, 104(1) -; 32BIT-NEXT: stw 6, 100(1) -; 32BIT-NEXT: stw 5, 96(1) -; 32BIT-NEXT: stw 4, 92(1) ; 32BIT-NEXT: stw 3, 88(1) +; 32BIT-NEXT: stw 4, 92(1) +; 32BIT-NEXT: stw 5, 96(1) +; 32BIT-NEXT: stw 6, 100(1) +; 32BIT-NEXT: stw 7, 104(1) +; 32BIT-NEXT: stw 8, 108(1) +; 32BIT-NEXT: stw 9, 112(1) +; 32BIT-NEXT: stw 10, 116(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -189,14 +189,14 @@ define void @i32_join_missing(i32 signext %a, i32 signext %b, i32 signext %c, i3 ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: std 10, 216(1) -; 64BIT-NEXT: std 9, 208(1) -; 64BIT-NEXT: std 8, 200(1) -; 64BIT-NEXT: std 7, 192(1) -; 64BIT-NEXT: std 6, 184(1) -; 64BIT-NEXT: std 5, 176(1) -; 64BIT-NEXT: std 4, 168(1) ; 64BIT-NEXT: std 3, 160(1) +; 64BIT-NEXT: std 4, 168(1) +; 64BIT-NEXT: std 5, 176(1) +; 64BIT-NEXT: std 6, 184(1) +; 64BIT-NEXT: std 7, 192(1) +; 64BIT-NEXT: std 8, 200(1) +; 64BIT-NEXT: std 9, 208(1) +; 64BIT-NEXT: std 10, 216(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -216,22 +216,22 @@ entry: ret void } -define void @f32_join(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) #0 { +define void @f32_join(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) { ; 32BIT-LABEL: f32_join: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stfs 10, 124(1) -; 32BIT-NEXT: stfs 9, 120(1) -; 32BIT-NEXT: stfs 8, 116(1) -; 32BIT-NEXT: stfs 7, 112(1) -; 32BIT-NEXT: stfs 6, 108(1) -; 32BIT-NEXT: stfs 5, 104(1) -; 32BIT-NEXT: stfs 4, 100(1) -; 32BIT-NEXT: stfs 3, 96(1) -; 32BIT-NEXT: stfs 2, 92(1) ; 32BIT-NEXT: stfs 1, 88(1) +; 32BIT-NEXT: stfs 2, 92(1) +; 32BIT-NEXT: stfs 3, 96(1) +; 32BIT-NEXT: stfs 4, 100(1) +; 32BIT-NEXT: stfs 5, 104(1) +; 32BIT-NEXT: stfs 6, 108(1) +; 32BIT-NEXT: stfs 7, 112(1) +; 32BIT-NEXT: stfs 8, 116(1) +; 32BIT-NEXT: stfs 9, 120(1) +; 32BIT-NEXT: stfs 10, 124(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -244,16 +244,16 @@ define void @f32_join(float %a, float %b, float %c, float %d, float %e, float %f ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: stfs 10, 232(1) -; 64BIT-NEXT: stfs 9, 224(1) -; 64BIT-NEXT: stfs 8, 216(1) -; 64BIT-NEXT: stfs 7, 208(1) -; 64BIT-NEXT: stfs 6, 200(1) -; 64BIT-NEXT: stfs 5, 192(1) -; 64BIT-NEXT: stfs 4, 184(1) -; 64BIT-NEXT: stfs 3, 176(1) -; 64BIT-NEXT: stfs 2, 168(1) ; 64BIT-NEXT: stfs 1, 160(1) +; 64BIT-NEXT: stfs 2, 168(1) +; 64BIT-NEXT: stfs 3, 176(1) +; 64BIT-NEXT: stfs 4, 184(1) +; 64BIT-NEXT: stfs 5, 192(1) +; 64BIT-NEXT: stfs 6, 200(1) +; 64BIT-NEXT: stfs 7, 208(1) +; 64BIT-NEXT: stfs 8, 216(1) +; 64BIT-NEXT: stfs 9, 224(1) +; 64BIT-NEXT: stfs 10, 232(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -274,22 +274,22 @@ entry: ret void } -define void @f32_join_missing(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) #0 { +define void @f32_join_missing(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j) { ; 32BIT-LABEL: f32_join_missing: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stfs 10, 124(1) -; 32BIT-NEXT: stfs 9, 120(1) -; 32BIT-NEXT: stfs 8, 116(1) -; 32BIT-NEXT: stfs 7, 112(1) -; 32BIT-NEXT: stfs 6, 108(1) -; 32BIT-NEXT: stfs 5, 104(1) -; 32BIT-NEXT: stfs 4, 100(1) -; 32BIT-NEXT: stfs 3, 96(1) -; 32BIT-NEXT: stfs 2, 92(1) ; 32BIT-NEXT: stfs 1, 88(1) +; 32BIT-NEXT: stfs 2, 92(1) +; 32BIT-NEXT: stfs 3, 96(1) +; 32BIT-NEXT: stfs 4, 100(1) +; 32BIT-NEXT: stfs 5, 104(1) +; 32BIT-NEXT: stfs 6, 108(1) +; 32BIT-NEXT: stfs 7, 112(1) +; 32BIT-NEXT: stfs 8, 116(1) +; 32BIT-NEXT: stfs 9, 120(1) +; 32BIT-NEXT: stfs 10, 124(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -302,16 +302,16 @@ define void @f32_join_missing(float %a, float %b, float %c, float %d, float %e, ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: stfs 10, 232(1) -; 64BIT-NEXT: stfs 9, 224(1) -; 64BIT-NEXT: stfs 8, 216(1) -; 64BIT-NEXT: stfs 7, 208(1) -; 64BIT-NEXT: stfs 6, 200(1) -; 64BIT-NEXT: stfs 5, 192(1) -; 64BIT-NEXT: stfs 4, 184(1) -; 64BIT-NEXT: stfs 3, 176(1) -; 64BIT-NEXT: stfs 2, 168(1) ; 64BIT-NEXT: stfs 1, 160(1) +; 64BIT-NEXT: stfs 2, 168(1) +; 64BIT-NEXT: stfs 3, 176(1) +; 64BIT-NEXT: stfs 4, 184(1) +; 64BIT-NEXT: stfs 5, 192(1) +; 64BIT-NEXT: stfs 6, 200(1) +; 64BIT-NEXT: stfs 7, 208(1) +; 64BIT-NEXT: stfs 8, 216(1) +; 64BIT-NEXT: stfs 9, 224(1) +; 64BIT-NEXT: stfs 10, 232(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -332,22 +332,22 @@ entry: ret void } -define void @f64_join(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j) #0 { +define void @f64_join(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j) { ; 32BIT-LABEL: f64_join: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stfd 10, 160(1) -; 32BIT-NEXT: stfd 9, 152(1) -; 32BIT-NEXT: stfd 8, 144(1) -; 32BIT-NEXT: stfd 7, 136(1) -; 32BIT-NEXT: stfd 6, 128(1) -; 32BIT-NEXT: stfd 5, 120(1) -; 32BIT-NEXT: stfd 4, 112(1) -; 32BIT-NEXT: stfd 3, 104(1) -; 32BIT-NEXT: stfd 2, 96(1) ; 32BIT-NEXT: stfd 1, 88(1) +; 32BIT-NEXT: stfd 2, 96(1) +; 32BIT-NEXT: stfd 3, 104(1) +; 32BIT-NEXT: stfd 4, 112(1) +; 32BIT-NEXT: stfd 5, 120(1) +; 32BIT-NEXT: stfd 6, 128(1) +; 32BIT-NEXT: stfd 7, 136(1) +; 32BIT-NEXT: stfd 8, 144(1) +; 32BIT-NEXT: stfd 9, 152(1) +; 32BIT-NEXT: stfd 10, 160(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -360,16 +360,16 @@ define void @f64_join(double %a, double %b, double %c, double %d, double %e, dou ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: stfd 10, 232(1) -; 64BIT-NEXT: stfd 9, 224(1) -; 64BIT-NEXT: stfd 8, 216(1) -; 64BIT-NEXT: stfd 7, 208(1) -; 64BIT-NEXT: stfd 6, 200(1) -; 64BIT-NEXT: stfd 5, 192(1) -; 64BIT-NEXT: stfd 4, 184(1) -; 64BIT-NEXT: stfd 3, 176(1) -; 64BIT-NEXT: stfd 2, 168(1) ; 64BIT-NEXT: stfd 1, 160(1) +; 64BIT-NEXT: stfd 2, 168(1) +; 64BIT-NEXT: stfd 3, 176(1) +; 64BIT-NEXT: stfd 4, 184(1) +; 64BIT-NEXT: stfd 5, 192(1) +; 64BIT-NEXT: stfd 6, 200(1) +; 64BIT-NEXT: stfd 7, 208(1) +; 64BIT-NEXT: stfd 8, 216(1) +; 64BIT-NEXT: stfd 9, 224(1) +; 64BIT-NEXT: stfd 10, 232(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -390,22 +390,22 @@ entry: ret void } -define void @f64_missing(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j) #0 { +define void @f64_missing(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j) { ; 32BIT-LABEL: f64_missing: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 ; 32BIT-NEXT: stwu 1, -64(1) ; 32BIT-NEXT: stw 0, 72(1) -; 32BIT-NEXT: stfd 10, 160(1) -; 32BIT-NEXT: stfd 9, 152(1) -; 32BIT-NEXT: stfd 8, 144(1) -; 32BIT-NEXT: stfd 7, 136(1) -; 32BIT-NEXT: stfd 6, 128(1) -; 32BIT-NEXT: stfd 5, 120(1) -; 32BIT-NEXT: stfd 4, 112(1) -; 32BIT-NEXT: stfd 3, 104(1) -; 32BIT-NEXT: stfd 2, 96(1) ; 32BIT-NEXT: stfd 1, 88(1) +; 32BIT-NEXT: stfd 2, 96(1) +; 32BIT-NEXT: stfd 3, 104(1) +; 32BIT-NEXT: stfd 4, 112(1) +; 32BIT-NEXT: stfd 5, 120(1) +; 32BIT-NEXT: stfd 6, 128(1) +; 32BIT-NEXT: stfd 7, 136(1) +; 32BIT-NEXT: stfd 8, 144(1) +; 32BIT-NEXT: stfd 9, 152(1) +; 32BIT-NEXT: stfd 10, 160(1) ; 32BIT-NEXT: bl .foo[PR] ; 32BIT-NEXT: nop ; 32BIT-NEXT: addi 1, 1, 64 @@ -418,16 +418,16 @@ define void @f64_missing(double %a, double %b, double %c, double %d, double %e, ; 64BIT-NEXT: mflr 0 ; 64BIT-NEXT: stdu 1, -112(1) ; 64BIT-NEXT: std 0, 128(1) -; 64BIT-NEXT: stfd 10, 232(1) -; 64BIT-NEXT: stfd 9, 224(1) -; 64BIT-NEXT: stfd 8, 216(1) -; 64BIT-NEXT: stfd 7, 208(1) -; 64BIT-NEXT: stfd 6, 200(1) -; 64BIT-NEXT: stfd 5, 192(1) -; 64BIT-NEXT: stfd 4, 184(1) -; 64BIT-NEXT: stfd 3, 176(1) -; 64BIT-NEXT: stfd 2, 168(1) ; 64BIT-NEXT: stfd 1, 160(1) +; 64BIT-NEXT: stfd 2, 168(1) +; 64BIT-NEXT: stfd 3, 176(1) +; 64BIT-NEXT: stfd 4, 184(1) +; 64BIT-NEXT: stfd 5, 192(1) +; 64BIT-NEXT: stfd 6, 200(1) +; 64BIT-NEXT: stfd 7, 208(1) +; 64BIT-NEXT: stfd 8, 216(1) +; 64BIT-NEXT: stfd 9, 224(1) +; 64BIT-NEXT: stfd 10, 232(1) ; 64BIT-NEXT: bl .foo[PR] ; 64BIT-NEXT: nop ; 64BIT-NEXT: addi 1, 1, 112 @@ -448,7 +448,7 @@ entry: ret void } -define void @mixed_1(double %a, i64 %b, i64 %c, i32 signext %d, i64 %e, float %f, float %g, double %h, i32 signext %i, double %j) #0 { +define void @mixed_1(double %a, i64 %b, i64 %c, i32 signext %d, i64 %e, float %f, float %g, double %h, i32 signext %i, double %j) { ; 32BIT-LABEL: mixed_1: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 @@ -587,7 +587,7 @@ entry: ret void } -define void @mixed_2(<2 x double> %a, <4 x i32> %b, i64 %c) #0 { +define void @mixed_2(<2 x double> %a, <4 x i32> %b, i64 %c) { ; 32BIT-LABEL: mixed_2: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 @@ -658,7 +658,7 @@ entry: %struct.foo = type <{ [3 x i32], double, [12 x i8], <4 x i32> }> -define void @mixed_3(<2 x double> %a, i64 %b, double %c, float %d, i32 signext %e, double %f, ...) #0 { +define void @mixed_3(<2 x double> %a, i64 %b, double %c, float %d, i32 signext %e, double %f, ...) { ; 32BIT-LABEL: mixed_3: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: mflr 0 @@ -720,10 +720,10 @@ define void @mixed_3(<2 x double> %a, i64 %b, double %c, float %d, i32 signext % ; 64BIT-NEXT: stxvd2x 34, 0, 3 ; 64BIT-NEXT: xsadddp 1, 0, 1 ; 64BIT-NEXT: std 5, 208(1) -; 64BIT-NEXT: stfs 2, 232(1) ; 64BIT-NEXT: std 6, 224(1) -; 64BIT-NEXT: std 8, 248(1) +; 64BIT-NEXT: stfs 2, 232(1) ; 64BIT-NEXT: std 7, 240(1) +; 64BIT-NEXT: std 8, 248(1) ; 64BIT-NEXT: stfd 3, 256(1) ; 64BIT-NEXT: std 9, 264(1) ; 64BIT-NEXT: std 10, 248(1) @@ -757,7 +757,7 @@ entry: ret void } -define signext i32 @mixed_4(ptr byval(%struct.foo) align 16 %foo, i32 %sec) #0 { +define signext i32 @mixed_4(ptr byval(%struct.foo) align 16 %foo, i32 %sec) { ; 32BIT-LABEL: mixed_4: ; 32BIT: # %bb.0: # %entry ; 32BIT-NEXT: stw 9, 48(1) @@ -807,10 +807,70 @@ entry: ret i32 %add1 } -declare void @foo() #0 -declare void @consume_f64(double) #0 -declare void @consume_f32(float) #0 -declare void @consume_i64(i64) #0 -declare void @consume_i32(i32 signext) #0 +%struct.bar = type { i8, i32, <4 x i32>, ptr, i8 } + +define void @mixed_5(ptr byref(%struct.bar) align 16 %r, ptr byval(%struct.bar) align 16 %x, i32 signext %y, ptr byval(%struct.foo) align 16 %f) { +; 32BIT-LABEL: mixed_5: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT: mflr 0 +; 32BIT-NEXT: stwu 1, -64(1) +; 32BIT-NEXT: stw 0, 72(1) +; 32BIT-NEXT: stw 3, 88(1) +; 32BIT-NEXT: lfd 1, 172(1) +; 32BIT-NEXT: stw 5, 96(1) +; 32BIT-NEXT: stw 6, 100(1) +; 32BIT-NEXT: stw 7, 104(1) +; 32BIT-NEXT: stw 8, 108(1) +; 32BIT-NEXT: stw 9, 112(1) +; 32BIT-NEXT: stw 10, 116(1) +; 32BIT-NEXT: bl .consume_f64[PR] +; 32BIT-NEXT: nop +; 32BIT-NEXT: lwz 3, 100(1) +; 32BIT-NEXT: bl .consume_i32[PR] +; 32BIT-NEXT: nop +; 32BIT-NEXT: addi 1, 1, 64 +; 32BIT-NEXT: lwz 0, 8(1) +; 32BIT-NEXT: mtlr 0 +; 32BIT-NEXT: blr +; +; 64BIT-LABEL: mixed_5: +; 64BIT: # %bb.0: # %entry +; 64BIT-NEXT: mflr 0 +; 64BIT-NEXT: stdu 1, -112(1) +; 64BIT-NEXT: std 0, 128(1) +; 64BIT-NEXT: std 3, 160(1) +; 64BIT-NEXT: lfd 1, 252(1) +; 64BIT-NEXT: std 5, 176(1) +; 64BIT-NEXT: std 6, 184(1) +; 64BIT-NEXT: std 7, 192(1) +; 64BIT-NEXT: std 8, 200(1) +; 64BIT-NEXT: std 9, 208(1) +; 64BIT-NEXT: std 10, 216(1) +; 64BIT-NEXT: bl .consume_f64[PR] +; 64BIT-NEXT: nop +; 64BIT-NEXT: lwa 3, 180(1) +; 64BIT-NEXT: bl .consume_i32[PR] +; 64BIT-NEXT: nop +; 64BIT-NEXT: addi 1, 1, 112 +; 64BIT-NEXT: ld 0, 16(1) +; 64BIT-NEXT: mtlr 0 +; 64BIT-NEXT: blr +entry: + %d = getelementptr inbounds i8, ptr %f, i64 12 + %0 = load double, ptr %d, align 4 + tail call void @consume_f64(double %0) + %i = getelementptr inbounds i8, ptr %x, i64 4 + %1 = load i32, ptr %i, align 4 + tail call void @consume_i32(i32 signext %1) + ret void +} + +declare void @foo() +declare void @consume_f64(double) +declare void @consume_f32(float) +declare void @consume_i64(i64) +declare void @consume_i32(i32 signext) + +!llvm.module.flags = !{!0} -attributes #0 = { "save-reg-params" nofree noinline nounwind } +!0 = !{i32 1, !"save-reg-params", i32 1}