diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 27131e14141cc..73cad483ff98d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2016,14 +2016,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { Register InReg = FuncInfo.InitializeRegForValue(Inst); - - std::optional CallConv; - auto *CB = dyn_cast(Inst); - if (CB && !CB->isInlineAsm()) - CallConv = CB->getCallingConv(); - RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), CallConv); + Inst->getType(), std::nullopt); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index c69ca77031495..eb85733b00e1a 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3265,6 +3265,12 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { bool Is64Bit = Subtarget->is64Bit(); bool IsWin64 = Subtarget->isCallingConvWin64(CC); + // If the return type is illegal, don't bother to promote it, just fall back + // to DAG ISel. + MVT RetVT; + if (!isTypeLegal(CLI.RetTy, RetVT) && !CLI.RetTy->isVoidTy()) + return false; + // Call / invoke instructions with NoCfCheck attribute require special // handling. if (CB && CB->doesNoCfCheck()) diff --git a/llvm/test/CodeGen/X86/bf16-fast-isel.ll b/llvm/test/CodeGen/X86/bf16-fast-isel.ll index c659e0e647d36..812ffc3ab5f19 100644 --- a/llvm/test/CodeGen/X86/bf16-fast-isel.ll +++ b/llvm/test/CodeGen/X86/bf16-fast-isel.ll @@ -6,6 +6,10 @@ define i8 @test_direct_call(ptr %f) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: callq __truncsfbf2@PLT ; CHECK-NEXT: callq bar@PLT ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq @@ -20,6 +24,10 @@ define i8 @test_fast_direct_call(ptr %f) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: callq foo_fast@PLT +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: callq __truncsfbf2@PLT ; CHECK-NEXT: callq bar@PLT ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq @@ -36,6 +44,10 @@ define i8 @test_indirect_all(ptr %fptr, ptr %f) nounwind { ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movq %rsi, %rdi ; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: callq __truncsfbf2@PLT ; CHECK-NEXT: callq *%rbx ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq @@ -45,6 +57,45 @@ entry: ret i8 %call2 } +define i8 @test_indirect_all2(ptr %fptr, ptr %f, i1 %cond) nounwind { +; CHECK-LABEL: test_indirect_all2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: movl %edx, %ebp +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: testb $1, %bpl +; CHECK-NEXT: je .LBB3_2 +; CHECK-NEXT: # %bb.1: # %exit +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: callq __truncsfbf2@PLT +; CHECK-NEXT: callq *%rbx +; CHECK-NEXT: jmp .LBB3_3 +; CHECK-NEXT: .LBB3_2: # %exit2 +; CHECK-NEXT: movb $3, %al +; CHECK-NEXT: .LBB3_3: # %exit2 +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %call = call bfloat @foo(ptr %f) + br i1 %cond, label %exit, label %exit2 + +exit: + %call2 = call zeroext i8 %fptr(bfloat %call) + ret i8 %call2 + +exit2: + ret i8 3 +} + + define i8 @test_fast_indirect_all(ptr %fptr, ptr %f) nounwind { ; CHECK-LABEL: test_fast_indirect_all: ; CHECK: # %bb.0: # %entry @@ -52,6 +103,10 @@ define i8 @test_fast_indirect_all(ptr %fptr, ptr %f) nounwind { ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movq %rsi, %rdi ; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: pextrw $0, %xmm0, %eax +; CHECK-NEXT: shll $16, %eax +; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: callq __truncsfbf2@PLT ; CHECK-NEXT: callq *%rbx ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr179100.ll b/llvm/test/CodeGen/X86/pr179100.ll new file mode 100644 index 0000000000000..4ab71604db5b6 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr179100.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc --fast-isel -mcpu=znver5 < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + +define fastcc i16 @test() nounwind { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq foo@PLT +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: vpmovd2m %xmm0, %k0 +; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpmovm2d %k0, %xmm1 +; CHECK-NEXT: callq bar@PLT +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %0 = call fastcc <4 x i1> @foo(ptr null, <4 x i1> zeroinitializer) + %1 = call fastcc i16 @bar(ptr null, <4 x i1> zeroinitializer, <4 x i1> %0) + ret i16 %1 +} + +declare fastcc <4 x i1> @foo(ptr, <4 x i1>) +declare fastcc i16 @bar(ptr, <4 x i1>, <4 x i1>)