Skip to content

Commit 6b73a4c

Browse files
committed
[AArch64][SVE] Remove false register dependency for unary FP convert operations
Generate movprfx for floating point convert zeroing pseudo operations Differential Revision: https://reviews.llvm.org/D118617
1 parent 9f30aff commit 6b73a4c

File tree

5 files changed

+470
-45
lines changed

5 files changed

+470
-45
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1683,60 +1683,61 @@ let Predicates = [HasSVEorStreamingSVE] in {
16831683
defm FCVTZS_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111110, "fcvtzs", ZPR64, ZPR64, null_frag, AArch64fcvtzs_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
16841684
defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd< 0b1111111, "fcvtzu", ZPR64, ZPR64, null_frag, AArch64fcvtzu_mt, nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
16851685

1686-
def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 PPR:$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
1687-
(FCVT_ZPmZ_HtoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1686+
//These patterns exist to improve the code quality of conversions on unpacked types.
1687+
def : Pat<(nxv2f32 (AArch64fcvte_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f16 ZPR:$Zs), (nxv2f32 ZPR:$Zd))),
1688+
(FCVT_ZPmZ_HtoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
16881689

16891690
// FP_ROUND has an additional 'precise' flag which indicates the type of rounding.
16901691
// This is ignored by the pattern below where it is matched by (i64 timm0_1)
1691-
def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 PPR:$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
1692-
(FCVT_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1692+
def : Pat<(nxv2f16 (AArch64fcvtr_mt (nxv2i1 (SVEAllActive):$Pg), (nxv2f32 ZPR:$Zs), (i64 timm0_1), (nxv2f16 ZPR:$Zd))),
1693+
(FCVT_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
16931694

1694-
// Floating-point -> signed integer
1695-
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1695+
// Signed integer -> Floating-point
1696+
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
16961697
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (nxv2f16 ZPR:$Zd))),
1697-
(SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1698+
(SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
16981699

1699-
def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 PPR:$Pg),
1700+
def : Pat<(nxv4f16 (AArch64scvtf_mt (nxv4i1 (SVEAllActive):$Pg),
17001701
(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (nxv4f16 ZPR:$Zd))),
1701-
(SCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1702+
(SCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17021703

1703-
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1704+
def : Pat<(nxv2f16 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17041705
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f16 ZPR:$Zd))),
1705-
(SCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1706+
(SCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17061707

1707-
def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1708+
def : Pat<(nxv2f32 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17081709
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f32 ZPR:$Zd))),
1709-
(SCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1710+
(SCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17101711

1711-
def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 PPR:$Pg),
1712+
def : Pat<(nxv2f64 (AArch64scvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17121713
(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (nxv2f64 ZPR:$Zd))),
1713-
(SCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1714+
(SCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17141715

1715-
// Floating-point -> unsigned integer
1716-
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1716+
// Unsigned integer -> Floating-point
1717+
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17171718
(and (nxv2i64 ZPR:$Zs),
17181719
(nxv2i64 (AArch64dup (i64 0xFFFF)))), (nxv2f16 ZPR:$Zd))),
1719-
(UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1720+
(UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17201721

1721-
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1722+
def : Pat<(nxv2f16 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17221723
(and (nxv2i64 ZPR:$Zs),
17231724
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f16 ZPR:$Zd))),
1724-
(UCVTF_ZPmZ_StoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1725+
(UCVTF_ZPmZ_StoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17251726

1726-
def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 PPR:$Pg),
1727+
def : Pat<(nxv4f16 (AArch64ucvtf_mt (nxv4i1 (SVEAllActive):$Pg),
17271728
(and (nxv4i32 ZPR:$Zs),
17281729
(nxv4i32 (AArch64dup (i32 0xFFFF)))), (nxv4f16 ZPR:$Zd))),
1729-
(UCVTF_ZPmZ_HtoH ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1730+
(UCVTF_ZPmZ_HtoH_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17301731

1731-
def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1732+
def : Pat<(nxv2f32 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17321733
(and (nxv2i64 ZPR:$Zs),
17331734
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f32 ZPR:$Zd))),
1734-
(UCVTF_ZPmZ_StoS ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1735+
(UCVTF_ZPmZ_StoS_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17351736

1736-
def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 PPR:$Pg),
1737+
def : Pat<(nxv2f64 (AArch64ucvtf_mt (nxv2i1 (SVEAllActive):$Pg),
17371738
(and (nxv2i64 ZPR:$Zs),
17381739
(nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))), (nxv2f64 ZPR:$Zd))),
1739-
(UCVTF_ZPmZ_StoD ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
1740+
(UCVTF_ZPmZ_StoD_UNDEF ZPR:$Zd, PPR:$Pg, ZPR:$Zs)>;
17401741

17411742
defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", AArch64frintn_mt>;
17421743
defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", AArch64frintp_mt>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,14 @@ class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType
370370
: Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), vtd:$Op3)),
371371
(inst $Op3, $Op1, $Op2)>;
372372

373+
multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
374+
ValueType vts, Instruction inst>{
375+
def : Pat<(vtd (op pg:$Op1, vts:$Op2, (i64 timm0_1), (vtd undef))),
376+
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
377+
def : Pat<(vtd (op (pg (SVEAllActive:$Op1)), vts:$Op2, (i64 timm0_1), vtd:$Op3)),
378+
(inst $Op3, $Op1, $Op2)>;
379+
}
380+
373381
class SVE_1_Op_Imm_OptLsl_Reverse_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
374382
ValueType it, ComplexPattern cpx, Instruction inst>
375383
: Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))),
@@ -2600,8 +2608,8 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
26002608
SDPatternOperator int_op,
26012609
SDPatternOperator ir_op, ValueType vt1,
26022610
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
2603-
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
2604-
2611+
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
2612+
SVEPseudo2Instr<NAME, 1>;
26052613
// convert vt1 to a packed type for the intrinsic patterns
26062614
defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
26072615
!eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
@@ -2615,8 +2623,11 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
26152623
1 : vt3);
26162624

26172625
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
2618-
26192626
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
2627+
2628+
def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
2629+
2630+
defm : SVE_1_Op_PassthruUndef_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
26202631
}
26212632

26222633
multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
@@ -2625,7 +2636,8 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
26252636
SDPatternOperator int_op,
26262637
SDPatternOperator ir_op, ValueType vt1,
26272638
ValueType vt2, ValueType vt3, ElementSizeEnum Sz> {
2628-
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>;
2639+
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
2640+
SVEPseudo2Instr<NAME, 1>;
26292641

26302642
// convert vt1 to a packed type for the intrinsic patterns
26312643
defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
@@ -2634,8 +2646,11 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
26342646
1 : vt1);
26352647

26362648
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
2637-
26382649
def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
2650+
2651+
def _UNDEF : PredOneOpPassthruPseudo<NAME, !cast<ZPRRegOp>(i_zprtype)>;
2652+
2653+
defm : SVE_1_Op_PassthruUndef_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME # _UNDEF)>;
26392654
}
26402655

26412656
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {

0 commit comments

Comments
 (0)