[PowerPC] Conversions of f128 and f16 should use _kf_ instead of _tf_ #99855

EsmeYi · 2024-07-22T09:30:54Z

This is a child PR of #97677.

llvmbot · 2024-07-22T09:31:16Z

@llvm/pr-subscribers-backend-powerpc

Author: Esme (EsmeYi)

Changes

This is a child PR of #97677.

Full diff: https://github.com/llvm/llvm-project/pull/99855.diff

4 Files Affected:

(modified) llvm/lib/CodeGen/TargetLoweringBase.cpp (+2)
(modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+6)
(modified) llvm/lib/Target/PowerPC/PPCInstrVSX.td (+8)
(added) llvm/test/CodeGen/PowerPC/f16-to-from-f128.ll (+102)

diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index ff684c7cb6bba..ed8d2dcc4817c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -170,8 +170,10 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::MUL_F128, "__mulkf3");
     setLibcallName(RTLIB::DIV_F128, "__divkf3");
     setLibcallName(RTLIB::POWI_F128, "__powikf2");
+    setLibcallName(RTLIB::FPEXT_F16_F128, "__extendhfkf2");
     setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
     setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
+    setLibcallName(RTLIB::FPROUND_F128_F16, "__trunckfhf2");
     setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
     setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
     setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 4d4008ac0ba70..360c463929b62 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -211,18 +211,24 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   }
 
   if (Subtarget.isISA3_0()) {
+    setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
+    setTruncStoreAction(MVT::f128, MVT::f16, Legal);
     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
   } else {
     // No extending loads from f16 or HW conversions back and forth.
+    setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
+    setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
+    setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
+    setTruncStoreAction(MVT::f128, MVT::f16, Expand);
     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
   }
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index dd07892794d59..51aa0be7439c6 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -3993,6 +3993,10 @@ defm : ScalToVecWPermute<
   (SUBREG_TO_REG (i64 1), (VEXTSH2Ds (LXSIHZX ForceXForm:$src)), sub_64)>;
 
 // Load/convert and convert/store patterns for f16.
+def : Pat<(f128 (extloadf16 ForceXForm:$src)),
+          (f128 (XSCVDPQP (XSCVHPDP (LXSIHZX ForceXForm:$src))))>;
+def : Pat<(truncstoref16 f128:$src, ForceXForm:$dst),
+          (STXSIHX (XSCVDPHP (XSCVQPDP $src)), ForceXForm:$dst)>;
 def : Pat<(f64 (extloadf16 ForceXForm:$src)),
           (f64 (XSCVHPDP (LXSIHZX ForceXForm:$src)))>;
 def : Pat<(truncstoref16 f64:$src, ForceXForm:$dst),
@@ -4001,6 +4005,8 @@ def : Pat<(f32 (extloadf16 ForceXForm:$src)),
           (f32 (COPY_TO_REGCLASS (XSCVHPDP (LXSIHZX ForceXForm:$src)), VSSRC))>;
 def : Pat<(truncstoref16 f32:$src, ForceXForm:$dst),
           (STXSIHX (XSCVDPHP (COPY_TO_REGCLASS $src, VSFRC)), ForceXForm:$dst)>;
+def : Pat<(f128 (f16_to_fp i32:$A)),
+          (f128 (XSCVDPQP (XSCVHPDP (MTVSRWZ $A))))>;
 def : Pat<(f64 (f16_to_fp i32:$A)),
           (f64 (XSCVHPDP (MTVSRWZ $A)))>;
 def : Pat<(f32 (f16_to_fp i32:$A)),
@@ -4008,6 +4014,8 @@ def : Pat<(f32 (f16_to_fp i32:$A)),
 def : Pat<(i32 (fp_to_f16 f32:$A)),
           (i32 (MFVSRWZ (XSCVDPHP (COPY_TO_REGCLASS $A, VSFRC))))>;
 def : Pat<(i32 (fp_to_f16 f64:$A)), (i32 (MFVSRWZ (XSCVDPHP $A)))>;
+def : Pat<(i32 (fp_to_f16 f128:$A)),
+          (i32 (MFVSRWZ (XSCVDPHP (XSCVQPDP $A))))>;
 
 // Vector sign extensions
 def : Pat<(f64 (PPCVexts f64:$A, 1)),
diff --git a/llvm/test/CodeGen/PowerPC/f16-to-from-f128.ll b/llvm/test/CodeGen/PowerPC/f16-to-from-f128.ll
new file mode 100644
index 0000000000000..19ba2ec991407
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/f16-to-from-f128.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:   --check-prefix=P8
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s
+; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -mattr=-hard-float \
+; RUN:   -verify-machineinstrs -ppc-asm-full-reg-names < %s | FileCheck %s \
+; RUN:   --check-prefix=SOFT
+
+define half @trunc(fp128 %a) unnamed_addr {
+; P8-LABEL: trunc:
+; P8:       # %bb.0: # %entry
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    .cfi_def_cfa_offset 32
+; P8-NEXT:    .cfi_offset lr, 16
+; P8-NEXT:    bl __trunckfhf2
+; P8-NEXT:    nop
+; P8-NEXT:    clrldi r3, r3, 48
+; P8-NEXT:    bl __gnu_h2f_ieee
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: trunc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscvqpdp v2, v2
+; CHECK-NEXT:    xscvdphp f0, vs34
+; CHECK-NEXT:    mffprwz r3, f0
+; CHECK-NEXT:    clrlwi r3, r3, 16
+; CHECK-NEXT:    mtfprwz f0, r3
+; CHECK-NEXT:    xscvhpdp f1, f0
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: trunc:
+; SOFT:       # %bb.0: # %entry
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    .cfi_def_cfa_offset 32
+; SOFT-NEXT:    .cfi_offset lr, 16
+; SOFT-NEXT:    bl __trunckfhf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __gnu_h2f_ieee
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __gnu_f2h_ieee
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+entry:
+  %0 = fptrunc fp128 %a to half
+  ret half %0
+}
+
+define fp128 @ext(half %a) unnamed_addr {
+; P8-LABEL: ext:
+; P8:       # %bb.0: # %entry
+; P8-NEXT:    mflr r0
+; P8-NEXT:    stdu r1, -32(r1)
+; P8-NEXT:    std r0, 48(r1)
+; P8-NEXT:    .cfi_def_cfa_offset 32
+; P8-NEXT:    .cfi_offset lr, 16
+; P8-NEXT:    bl __extendsfkf2
+; P8-NEXT:    nop
+; P8-NEXT:    addi r1, r1, 32
+; P8-NEXT:    ld r0, 16(r1)
+; P8-NEXT:    mtlr r0
+; P8-NEXT:    blr
+;
+; CHECK-LABEL: ext:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xscpsgndp vs34, f1, f1
+; CHECK-NEXT:    xscvdpqp v2, v2
+; CHECK-NEXT:    blr
+;
+; SOFT-LABEL: ext:
+; SOFT:       # %bb.0: # %entry
+; SOFT-NEXT:    mflr r0
+; SOFT-NEXT:    stdu r1, -32(r1)
+; SOFT-NEXT:    std r0, 48(r1)
+; SOFT-NEXT:    .cfi_def_cfa_offset 32
+; SOFT-NEXT:    .cfi_offset lr, 16
+; SOFT-NEXT:    clrldi r3, r3, 48
+; SOFT-NEXT:    bl __gnu_h2f_ieee
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    bl __extendsfkf2
+; SOFT-NEXT:    nop
+; SOFT-NEXT:    addi r1, r1, 32
+; SOFT-NEXT:    ld r0, 16(r1)
+; SOFT-NEXT:    mtlr r0
+; SOFT-NEXT:    blr
+entry:
+  %0 = fpext half %a to fp128
+  ret fp128 %0
+}

tgross35 · 2024-07-23T06:26:00Z

Fixes #98126

tgross35 · 2024-08-22T07:21:35Z

@chenzheng1030 would you be able to take a look at the second commit (40b97f3) here, for merge after #97677?

ecnelises · 2024-08-22T09:07:09Z

Seems this is not rebased upon #97677? After #97677 lands, this will only have 40b97f3. That commit looks good to me.

chenzheng1030 · 2024-08-22T09:42:55Z

@chenzheng1030 would you be able to take a look at the second commit (40b97f3) here, for merge after #97677?

I am waiting for the first patch being merged and the second one being rebased.

EsmeYi added 2 commits July 4, 2024 01:05

F16 and f128 conversion.

2684fc1

fp128 -> half uses __trunctfhf2 but should be __trunckfhf2

40b97f3

EsmeYi added the backend:PowerPC label Jul 22, 2024

EsmeYi requested review from chenzheng1030 and ecnelises July 22, 2024 09:30

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[PowerPC] Conversions of f128 and f16 should use _kf_ instead of _tf_ #99855

[PowerPC] Conversions of f128 and f16 should use _kf_ instead of _tf_ #99855

EsmeYi commented Jul 22, 2024

llvmbot commented Jul 22, 2024

tgross35 commented Jul 23, 2024

tgross35 commented Aug 22, 2024

ecnelises commented Aug 22, 2024

chenzheng1030 commented Aug 22, 2024

[PowerPC] Conversions of f128 and f16 should use _kf_ instead of _tf_ #99855

Are you sure you want to change the base?

[PowerPC] Conversions of f128 and f16 should use _kf_ instead of _tf_ #99855

Conversation

EsmeYi commented Jul 22, 2024

llvmbot commented Jul 22, 2024

tgross35 commented Jul 23, 2024

tgross35 commented Aug 22, 2024

ecnelises commented Aug 22, 2024

chenzheng1030 commented Aug 22, 2024