Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RISC-V: Select FCANONICALIZE #112083

Merged
merged 1 commit into from
Oct 14, 2024
Merged

RISC-V: Select FCANONICALIZE #112083

merged 1 commit into from
Oct 14, 2024

Conversation

wzssyqa
Copy link
Contributor

@wzssyqa wzssyqa commented Oct 12, 2024

We can use FMIN.x OP,OP to canonlize a float.

@llvmbot
Copy link
Collaborator

llvmbot commented Oct 12, 2024

@llvm/pr-subscribers-backend-risc-v

Author: YunQiang Su (wzssyqa)

Changes

We can use FMIN.x OP,OP to canonlize a float.


Patch is 66.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112083.diff

5 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+16-16)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoD.td (+1)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoF.td (+1)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td (+1)
  • (added) llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll (+1340)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9fe989bff263fb..9921d6e245c9b4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -411,13 +411,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SELECT, XLenVT, Custom);
 
   static const unsigned FPLegalNodeTypes[] = {
-      ISD::FMINNUM,       ISD::FMAXNUM,       ISD::FMINIMUMNUM,
-      ISD::FMAXIMUMNUM,   ISD::LRINT,         ISD::LLRINT,
-      ISD::LROUND,        ISD::LLROUND,       ISD::STRICT_LRINT,
-      ISD::STRICT_LLRINT, ISD::STRICT_LROUND, ISD::STRICT_LLROUND,
-      ISD::STRICT_FMA,    ISD::STRICT_FADD,   ISD::STRICT_FSUB,
-      ISD::STRICT_FMUL,   ISD::STRICT_FDIV,   ISD::STRICT_FSQRT,
-      ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
+      ISD::FMINNUM,        ISD::FMAXNUM,       ISD::FMINIMUMNUM,
+      ISD::FMAXIMUMNUM,    ISD::FCANONICALIZE, ISD::LRINT,
+      ISD::LLRINT,         ISD::LROUND,        ISD::LLROUND,
+      ISD::STRICT_LRINT,   ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
+      ISD::STRICT_LLROUND, ISD::STRICT_FMA,    ISD::STRICT_FADD,
+      ISD::STRICT_FSUB,    ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
+      ISD::STRICT_FSQRT,   ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
 
   static const ISD::CondCode FPCCToExpand[] = {
       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
@@ -433,15 +433,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       ISD::FROUNDEVEN};
 
   static const unsigned ZfhminZfbfminPromoteOps[] = {
-      ISD::FMINNUM,      ISD::FMAXNUM,       ISD::FMAXIMUMNUM,
-      ISD::FMINIMUMNUM,  ISD::FADD,          ISD::FSUB,
-      ISD::FMUL,         ISD::FMA,           ISD::FDIV,
-      ISD::FSQRT,        ISD::STRICT_FMA,    ISD::STRICT_FADD,
-      ISD::STRICT_FSUB,  ISD::STRICT_FMUL,   ISD::STRICT_FDIV,
-      ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
-      ISD::SETCC,        ISD::FCEIL,         ISD::FFLOOR,
-      ISD::FTRUNC,       ISD::FRINT,         ISD::FROUND,
-      ISD::FROUNDEVEN};
+      ISD::FMINNUM,        ISD::FMAXNUM,       ISD::FMAXIMUMNUM,
+      ISD::FMINIMUMNUM,    ISD::FCANONICALIZE, ISD::FADD,
+      ISD::FSUB,           ISD::FMUL,          ISD::FMA,
+      ISD::FDIV,           ISD::FSQRT,         ISD::STRICT_FMA,
+      ISD::STRICT_FADD,    ISD::STRICT_FSUB,   ISD::STRICT_FMUL,
+      ISD::STRICT_FDIV,    ISD::STRICT_FSQRT,  ISD::STRICT_FSETCC,
+      ISD::STRICT_FSETCCS, ISD::SETCC,         ISD::FCEIL,
+      ISD::FFLOOR,         ISD::FTRUNC,        ISD::FRINT,
+      ISD::FROUND,         ISD::FROUNDEVEN};
 
   if (Subtarget.hasStdExtZfbfmin()) {
     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 383a2fdede8364..5c8977142ad1b4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -398,6 +398,7 @@ foreach Ext = DExts in {
   defm : PatFprFpr_m<fmaximumnum, FMAX_D, Ext>;
   defm : PatFprFpr_m<riscv_fmin, FMIN_D, Ext>;
   defm : PatFprFpr_m<riscv_fmax, FMAX_D, Ext>;
+  def : Pat<(f64 (fcanonicalize FPR64:$rs1)), (FMIN_D $rs1, $rs1)>;
 }
 
 /// Setcc
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 000b7cfedb0f91..a134f37c774954 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -623,6 +623,7 @@ foreach Ext = FExts in {
   defm : PatFprFpr_m<fmaximumnum, FMAX_S, Ext>;
   defm : PatFprFpr_m<riscv_fmin, FMIN_S, Ext>;
   defm : PatFprFpr_m<riscv_fmax, FMAX_S, Ext>;
+  def : Pat<(f32 (fcanonicalize FPR32:$rs1)), (FMIN_S $rs1, $rs1)>;
 }
 
 /// Setcc
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 51123180d47c69..0d3127e0d5abeb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -369,6 +369,7 @@ foreach Ext = ZfhExts in {
   defm : PatFprFpr_m<fmaximumnum, FMAX_H, Ext>;
   defm : PatFprFpr_m<riscv_fmin, FMIN_H, Ext>;
   defm : PatFprFpr_m<riscv_fmax, FMAX_H, Ext>;
+  def : Pat<(f16 (fcanonicalize FPR16:$rs1)), (FMIN_H $rs1, $rs1)>;
 }
 
 /// Setcc
diff --git a/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
new file mode 100644
index 00000000000000..a50988525e98dc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll
@@ -0,0 +1,1340 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; FIXME: @llvm.canonicalize doesn't support soft-float abi yet.
+; RUN: llc --mtriple=riscv64 --mattr=+d,+zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16-RV64
+; RUN: llc --mtriple=riscv64 --mattr=+d,-zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16-RV64
+; RUN: llc --mtriple=riscv32 --mattr=+d,+zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16-RV32
+; RUN: llc --mtriple=riscv32 --mattr=+d,-zfh < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16-RV32
+
+declare half @llvm.fcanonicalize.f16(half)
+declare float @llvm.fcanonicalize.f32(float)
+declare double @llvm.fcanonicalize.f64(double)
+
+define half @fcanonicalize_f16(half %x) {
+; CHECK-FP16-LABEL: fcanonicalize_f16:
+; CHECK-FP16:       # %bb.0:
+; CHECK-FP16-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-RV64-LABEL: fcanonicalize_f16:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f16:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, -16
+; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NOFP16-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w a0, fa0
+; CHECK-NOFP16-RV64-NEXT:    lui a1, 1048560
+; CHECK-NOFP16-RV64-NEXT:    or a0, a0, a1
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, 16
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_f16:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f16:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, -16
+; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NOFP16-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-NOFP16-RV32-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV32-NEXT:    fmv.x.w a0, fa0
+; CHECK-NOFP16-RV32-NEXT:    lui a1, 1048560
+; CHECK-NOFP16-RV32-NEXT:    or a0, a0, a1
+; CHECK-NOFP16-RV32-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
+; CHECK-NOFP16-RV32-NEXT:    ret
+  %z = call half @llvm.canonicalize.f16(half %x)
+  ret half %z
+}
+
+define half @fcanonicalize_f16_nnan(half %x) {
+; CHECK-FP16-LABEL: fcanonicalize_f16_nnan:
+; CHECK-FP16:       # %bb.0:
+; CHECK-FP16-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-RV64-LABEL: fcanonicalize_f16_nnan:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_f16_nnan:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, -16
+; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NOFP16-RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w a0, fa0
+; CHECK-NOFP16-RV64-NEXT:    lui a1, 1048560
+; CHECK-NOFP16-RV64-NEXT:    or a0, a0, a1
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, 16
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_f16_nnan:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_f16_nnan:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, -16
+; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NOFP16-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-NOFP16-RV32-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV32-NEXT:    fmv.x.w a0, fa0
+; CHECK-NOFP16-RV32-NEXT:    lui a1, 1048560
+; CHECK-NOFP16-RV32-NEXT:    or a0, a0, a1
+; CHECK-NOFP16-RV32-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
+; CHECK-NOFP16-RV32-NEXT:    ret
+  %z = call nnan half @llvm.canonicalize.f16(half %x)
+  ret half %z
+}
+
+define <2 x half> @fcanonicalize_v2f16(<2 x half> %x) {
+; CHECK-FP16-LABEL: fcanonicalize_v2f16:
+; CHECK-FP16:       # %bb.0:
+; CHECK-FP16-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-NEXT:    fmin.h fa1, fa1, fa1
+; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f16:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.h fa1, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v2f16:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, -32
+; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NOFP16-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-NOFP16-RV64-NEXT:    mv s0, a1
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w s1, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, s0
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w a1, fa0
+; CHECK-NOFP16-RV64-NEXT:    mv a0, s1
+; CHECK-NOFP16-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, 32
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f16:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.h fa1, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v2f16:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, -16
+; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NOFP16-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset fs0, -16
+; CHECK-NOFP16-RV32-NEXT:    fmv.w.x fs0, a1
+; CHECK-NOFP16-RV32-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV32-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV32-NEXT:    fmv.x.w s0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmv.s fa0, fs0
+; CHECK-NOFP16-RV32-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV32-NEXT:    fmv.x.w a1, fa0
+; CHECK-NOFP16-RV32-NEXT:    mv a0, s0
+; CHECK-NOFP16-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
+; CHECK-NOFP16-RV32-NEXT:    ret
+  %z = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
+  ret <2 x half> %z
+}
+
+define <2 x half> @fcanonicalize_v2f16_nnan(<2 x half> %x) {
+; CHECK-FP16-LABEL: fcanonicalize_v2f16_nnan:
+; CHECK-FP16:       # %bb.0:
+; CHECK-FP16-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-NEXT:    fmin.h fa1, fa1, fa1
+; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v2f16_nnan:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.h fa1, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v2f16_nnan:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, -32
+; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NOFP16-RV64-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-NOFP16-RV64-NEXT:    mv s0, a1
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w s1, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, s0
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w a1, fa0
+; CHECK-NOFP16-RV64-NEXT:    mv a0, s1
+; CHECK-NOFP16-RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, 32
+; CHECK-NOFP16-RV64-NEXT:    ret
+;
+; CHECK-FP16-RV32-LABEL: fcanonicalize_v2f16_nnan:
+; CHECK-FP16-RV32:       # %bb.0:
+; CHECK-FP16-RV32-NEXT:    fmin.h fa0, fa0, fa0
+; CHECK-FP16-RV32-NEXT:    fmin.h fa1, fa1, fa1
+; CHECK-FP16-RV32-NEXT:    ret
+;
+; CHECK-NOFP16-RV32-LABEL: fcanonicalize_v2f16_nnan:
+; CHECK-NOFP16-RV32:       # %bb.0:
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, -16
+; CHECK-NOFP16-RV32-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NOFP16-RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset ra, -4
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset s0, -8
+; CHECK-NOFP16-RV32-NEXT:    .cfi_offset fs0, -16
+; CHECK-NOFP16-RV32-NEXT:    fmv.w.x fs0, a1
+; CHECK-NOFP16-RV32-NEXT:    fmv.w.x fa0, a0
+; CHECK-NOFP16-RV32-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV32-NEXT:    fmv.x.w s0, fa0
+; CHECK-NOFP16-RV32-NEXT:    fmv.s fa0, fs0
+; CHECK-NOFP16-RV32-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV32-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV32-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV32-NEXT:    fmv.x.w a1, fa0
+; CHECK-NOFP16-RV32-NEXT:    mv a0, s0
+; CHECK-NOFP16-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV32-NEXT:    addi sp, sp, 16
+; CHECK-NOFP16-RV32-NEXT:    ret
+  %z = call nnan <2 x half> @llvm.canonicalize.v2f16(<2 x half> %x)
+  ret <2 x half> %z
+}
+
+define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) {
+; CHECK-FP16-LABEL: fcanonicalize_v4f16:
+; CHECK-FP16:       # %bb.0:
+; CHECK-FP16-NEXT:    fmin.h fa5, fa0, fa0
+; CHECK-FP16-NEXT:    fmin.h fa4, fa1, fa1
+; CHECK-FP16-NEXT:    fmin.h fa2, fa2, fa2
+; CHECK-FP16-NEXT:    fmin.h fa3, fa3, fa3
+; CHECK-FP16-NEXT:    fsh fa5, 0(a0)
+; CHECK-FP16-NEXT:    fsh fa4, 2(a0)
+; CHECK-FP16-NEXT:    fsh fa2, 4(a0)
+; CHECK-FP16-NEXT:    fsh fa3, 6(a0)
+; CHECK-FP16-NEXT:    ret
+; CHECK-FP16-RV64-LABEL: fcanonicalize_v4f16:
+; CHECK-FP16-RV64:       # %bb.0:
+; CHECK-FP16-RV64-NEXT:    fmin.h fa5, fa0, fa0
+; CHECK-FP16-RV64-NEXT:    fmin.h fa4, fa1, fa1
+; CHECK-FP16-RV64-NEXT:    fmin.h fa2, fa2, fa2
+; CHECK-FP16-RV64-NEXT:    fmin.h fa3, fa3, fa3
+; CHECK-FP16-RV64-NEXT:    fsh fa5, 0(a0)
+; CHECK-FP16-RV64-NEXT:    fsh fa4, 2(a0)
+; CHECK-FP16-RV64-NEXT:    fsh fa2, 4(a0)
+; CHECK-FP16-RV64-NEXT:    fsh fa3, 6(a0)
+; CHECK-FP16-RV64-NEXT:    ret
+;
+; CHECK-NOFP16-RV64-LABEL: fcanonicalize_v4f16:
+; CHECK-NOFP16-RV64:       # %bb.0:
+; CHECK-NOFP16-RV64-NEXT:    addi sp, sp, -64
+; CHECK-NOFP16-RV64-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NOFP16-RV64-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    fsd fs0, 16(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    fsd fs1, 8(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    fsd fs2, 0(sp) # 8-byte Folded Spill
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset ra, -8
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s0, -16
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s1, -24
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s2, -32
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset s3, -40
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset fs0, -48
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset fs1, -56
+; CHECK-NOFP16-RV64-NEXT:    .cfi_offset fs2, -64
+; CHECK-NOFP16-RV64-NEXT:    lhu s1, 0(a1)
+; CHECK-NOFP16-RV64-NEXT:    lhu s2, 8(a1)
+; CHECK-NOFP16-RV64-NEXT:    lhu s3, 16(a1)
+; CHECK-NOFP16-RV64-NEXT:    lhu a1, 24(a1)
+; CHECK-NOFP16-RV64-NEXT:    mv s0, a0
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, a1
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.s fs0, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, s3
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.s fs1, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, s2
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.s fs2, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmv.w.x fa0, s1
+; CHECK-NOFP16-RV64-NEXT:    call __extendhfsf2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fa0, fa0
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w s1, fa0
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w s2, fs2
+; CHECK-NOFP16-RV64-NEXT:    fmin.s fa0, fs0, fs0
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w s3, fs1
+; CHECK-NOFP16-RV64-NEXT:    call __truncsfhf2
+; CHECK-NOFP16-RV64-NEXT:    fmv.x.w a0, fa0
+; CHECK-NOFP16-RV64-NEXT:    sh s1, 0(s0)
+; CHECK-NOFP16-RV64-NEXT:    sh s2, 2(s0)
+; CHECK-NOFP16-RV64-NEXT:    sh s3, 4(s0)
+; CHECK-NOFP16-RV64-NEXT:    sh a0, 6(s0)
+; CHECK-NOFP16-RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; CHECK-NOFP16-RV64-NEXT:    ld s2, 32(sp) ...
[truncated]

@wzssyqa wzssyqa requested a review from dtcxzyw October 12, 2024 06:58
We can use `FMIN.x OP,OP` to canonlize a float.
@wzssyqa wzssyqa merged commit c01ddbe into llvm:main Oct 14, 2024
8 checks passed
@wzssyqa wzssyqa deleted the fcanonicalize_rv branch October 14, 2024 06:12
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants