Skip to content

Commit 39b1af8

Browse files
committed
[AArch64][SVE] Added optimisation for SVE intrinsics with no active lanes
1 parent c408a24 commit 39b1af8

File tree

3 files changed

+382
-564
lines changed

3 files changed

+382
-564
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 122 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,9 +1406,30 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
14061406
return &II;
14071407
}
14081408

1409+
// Optimize operations that take an all false predicate or send them for
1410+
// canonicalization.
1411+
static std::optional<Instruction *>
1412+
instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
1413+
Intrinsic::ID IID) {
1414+
if (match(II.getOperand(0), m_ZeroInt())) {
1415+
if (II.getIntrinsicID() != IID)
1416+
// llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
1417+
// inactive for sv[func]_m or sv[func]_z
1418+
return IC.replaceInstUsesWith(II, II.getOperand(1));
1419+
else
1420+
// llvm_ir_u, pred(0), op1, op2 - Spec says to return undef when all lanes
1421+
// are inactive for sv[func]_x
1422+
return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
1423+
}
1424+
if (II.getIntrinsicID() != IID)
1425+
return instCombineSVEAllActive(II, IID);
1426+
return std::nullopt;
1427+
}
1428+
14091429
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
14101430
IntrinsicInst &II) {
1411-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u))
1431+
if (auto II_U =
1432+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
14121433
return II_U;
14131434
if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
14141435
Intrinsic::aarch64_sve_mla>(
@@ -1421,9 +1442,22 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
14211442
return std::nullopt;
14221443
}
14231444

1445+
static std::optional<Instruction *>
1446+
instCombineSVEVectorAddU(InstCombiner &IC, IntrinsicInst &II) {
1447+
if (auto II_U =
1448+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
1449+
return II_U;
1450+
else {
1451+
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1452+
Intrinsic::aarch64_sve_mla_u>(
1453+
IC, II, true);
1454+
}
1455+
}
1456+
14241457
static std::optional<Instruction *>
14251458
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
1426-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u))
1459+
if (auto II_U =
1460+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
14271461
return II_U;
14281462
if (auto FMLA =
14291463
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1445,6 +1479,9 @@ instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
14451479

14461480
static std::optional<Instruction *>
14471481
instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
1482+
if (auto II_U =
1483+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
1484+
return II_U;
14481485
if (auto FMLA =
14491486
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
14501487
Intrinsic::aarch64_sve_fmla>(IC, II,
@@ -1465,7 +1502,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
14651502

14661503
static std::optional<Instruction *>
14671504
instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
1468-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u))
1505+
if (auto II_U =
1506+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
14691507
return II_U;
14701508
if (auto FMLS =
14711509
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1487,6 +1525,9 @@ instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
14871525

14881526
static std::optional<Instruction *>
14891527
instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
1528+
if (auto II_U =
1529+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
1530+
return II_U;
14901531
if (auto FMLS =
14911532
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
14921533
Intrinsic::aarch64_sve_fmls>(IC, II,
@@ -1507,7 +1548,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
15071548

15081549
static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
15091550
IntrinsicInst &II) {
1510-
if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u))
1551+
if (auto II_U =
1552+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
15111553
return II_U;
15121554
if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
15131555
Intrinsic::aarch64_sve_mls>(
@@ -1516,17 +1558,27 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
15161558
return std::nullopt;
15171559
}
15181560

1561+
static std::optional<Instruction *>
1562+
instCombineSVEVectorSubU(InstCombiner &IC, IntrinsicInst &II) {
1563+
if (auto II_U =
1564+
instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
1565+
return II_U;
1566+
else {
1567+
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1568+
Intrinsic::aarch64_sve_mls_u>(
1569+
IC, II, true);
1570+
}
1571+
}
1572+
15191573
static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
15201574
IntrinsicInst &II,
15211575
Intrinsic::ID IID) {
15221576
auto *OpPredicate = II.getOperand(0);
15231577
auto *OpMultiplicand = II.getOperand(1);
15241578
auto *OpMultiplier = II.getOperand(2);
15251579

1526-
// Canonicalise a non _u intrinsic only.
1527-
if (II.getIntrinsicID() != IID)
1528-
if (auto II_U = instCombineSVEAllActive(II, IID))
1529-
return II_U;
1580+
if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID))
1581+
return II_U;
15301582

15311583
// Return true if a given instruction is a unit splat value, false otherwise.
15321584
auto IsUnitSplat = [](auto *I) {
@@ -1891,91 +1943,117 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
18911943
case Intrinsic::aarch64_sve_ptest_last:
18921944
return instCombineSVEPTest(IC, II);
18931945
case Intrinsic::aarch64_sve_fabd:
1894-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
1946+
case Intrinsic::aarch64_sve_fabd_u:
1947+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
18951948
case Intrinsic::aarch64_sve_fadd:
18961949
return instCombineSVEVectorFAdd(IC, II);
18971950
case Intrinsic::aarch64_sve_fadd_u:
18981951
return instCombineSVEVectorFAddU(IC, II);
18991952
case Intrinsic::aarch64_sve_fdiv:
1900-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
1953+
case Intrinsic::aarch64_sve_fdiv_u:
1954+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
19011955
case Intrinsic::aarch64_sve_fmax:
1902-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
1956+
case Intrinsic::aarch64_sve_fmax_u:
1957+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
19031958
case Intrinsic::aarch64_sve_fmaxnm:
1904-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
1959+
case Intrinsic::aarch64_sve_fmaxnm_u:
1960+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
19051961
case Intrinsic::aarch64_sve_fmin:
1906-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
1962+
case Intrinsic::aarch64_sve_fmin_u:
1963+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
19071964
case Intrinsic::aarch64_sve_fminnm:
1908-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
1965+
case Intrinsic::aarch64_sve_fminnm_u:
1966+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
19091967
case Intrinsic::aarch64_sve_fmla:
1910-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
1968+
case Intrinsic::aarch64_sve_fmla_u:
1969+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
19111970
case Intrinsic::aarch64_sve_fmls:
1912-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
1971+
case Intrinsic::aarch64_sve_fmls_u:
1972+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
19131973
case Intrinsic::aarch64_sve_fmul:
19141974
case Intrinsic::aarch64_sve_fmul_u:
19151975
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
19161976
case Intrinsic::aarch64_sve_fmulx:
1917-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
1977+
case Intrinsic::aarch64_sve_fmulx_u:
1978+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
19181979
case Intrinsic::aarch64_sve_fnmla:
1919-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
1980+
case Intrinsic::aarch64_sve_fnmla_u:
1981+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
19201982
case Intrinsic::aarch64_sve_fnmls:
1921-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
1983+
case Intrinsic::aarch64_sve_fnmls_u:
1984+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
19221985
case Intrinsic::aarch64_sve_fsub:
19231986
return instCombineSVEVectorFSub(IC, II);
19241987
case Intrinsic::aarch64_sve_fsub_u:
19251988
return instCombineSVEVectorFSubU(IC, II);
19261989
case Intrinsic::aarch64_sve_add:
19271990
return instCombineSVEVectorAdd(IC, II);
19281991
case Intrinsic::aarch64_sve_add_u:
1929-
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1930-
Intrinsic::aarch64_sve_mla_u>(
1931-
IC, II, true);
1992+
return instCombineSVEVectorAddU(IC, II);
19321993
case Intrinsic::aarch64_sve_mla:
1933-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
1994+
case Intrinsic::aarch64_sve_mla_u:
1995+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
19341996
case Intrinsic::aarch64_sve_mls:
1935-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
1997+
case Intrinsic::aarch64_sve_mls_u:
1998+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
19361999
case Intrinsic::aarch64_sve_mul:
19372000
case Intrinsic::aarch64_sve_mul_u:
19382001
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
19392002
case Intrinsic::aarch64_sve_sabd:
1940-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
2003+
case Intrinsic::aarch64_sve_sabd_u:
2004+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
19412005
case Intrinsic::aarch64_sve_smax:
1942-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
2006+
case Intrinsic::aarch64_sve_smax_u:
2007+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
19432008
case Intrinsic::aarch64_sve_smin:
1944-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
2009+
case Intrinsic::aarch64_sve_smin_u:
2010+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
19452011
case Intrinsic::aarch64_sve_smulh:
1946-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
2012+
case Intrinsic::aarch64_sve_smulh_u:
2013+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
19472014
case Intrinsic::aarch64_sve_sub:
19482015
return instCombineSVEVectorSub(IC, II);
19492016
case Intrinsic::aarch64_sve_sub_u:
1950-
return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1951-
Intrinsic::aarch64_sve_mls_u>(
1952-
IC, II, true);
2017+
return instCombineSVEVectorSubU(IC, II);
19532018
case Intrinsic::aarch64_sve_uabd:
1954-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
2019+
case Intrinsic::aarch64_sve_uabd_u:
2020+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
19552021
case Intrinsic::aarch64_sve_umax:
1956-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u);
2022+
case Intrinsic::aarch64_sve_umax_u:
2023+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u);
19572024
case Intrinsic::aarch64_sve_umin:
1958-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u);
2025+
case Intrinsic::aarch64_sve_umin_u:
2026+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u);
19592027
case Intrinsic::aarch64_sve_umulh:
1960-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u);
2028+
case Intrinsic::aarch64_sve_umulh_u:
2029+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u);
19612030
case Intrinsic::aarch64_sve_asr:
1962-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u);
2031+
case Intrinsic::aarch64_sve_asr_u:
2032+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u);
19632033
case Intrinsic::aarch64_sve_lsl:
1964-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u);
2034+
case Intrinsic::aarch64_sve_lsl_u:
2035+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u);
19652036
case Intrinsic::aarch64_sve_lsr:
1966-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u);
2037+
case Intrinsic::aarch64_sve_lsr_u:
2038+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u);
19672039
case Intrinsic::aarch64_sve_and:
1968-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u);
2040+
case Intrinsic::aarch64_sve_and_u:
2041+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u);
19692042
case Intrinsic::aarch64_sve_bic:
1970-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u);
2043+
case Intrinsic::aarch64_sve_bic_u:
2044+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u);
19712045
case Intrinsic::aarch64_sve_eor:
1972-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u);
2046+
case Intrinsic::aarch64_sve_eor_u:
2047+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u);
19732048
case Intrinsic::aarch64_sve_orr:
1974-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u);
2049+
case Intrinsic::aarch64_sve_orr_u:
2050+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u);
19752051
case Intrinsic::aarch64_sve_sqsub:
1976-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u);
2052+
case Intrinsic::aarch64_sve_sqsub_u:
2053+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u);
19772054
case Intrinsic::aarch64_sve_uqsub:
1978-
return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u);
2055+
case Intrinsic::aarch64_sve_uqsub_u:
2056+
return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u);
19792057
case Intrinsic::aarch64_sve_tbl:
19802058
return instCombineSVETBL(IC, II);
19812059
case Intrinsic::aarch64_sve_uunpkhi:

0 commit comments

Comments
 (0)