@@ -1406,9 +1406,30 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
14061406 return &II;
14071407}
14081408
1409+ // Optimize operations that take an all false predicate or send them for
1410+ // canonicalization.
1411+ static std::optional<Instruction *>
1412+ instCombineSVEAllOrNoActive (InstCombiner &IC, IntrinsicInst &II,
1413+ Intrinsic::ID IID) {
1414+ if (match (II.getOperand (0 ), m_ZeroInt ())) {
1415+ if (II.getIntrinsicID () != IID)
1416+ // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
1417+ // inactive for sv[func]_m or sv[func]_z
1418+ return IC.replaceInstUsesWith (II, II.getOperand (1 ));
1419+ else
1420+ // llvm_ir_u, pred(0), op1, op2 - Spec says to return undef when all lanes
1421+ // are inactive for sv[func]_x
1422+ return IC.replaceInstUsesWith (II, UndefValue::get (II.getType ()));
1423+ }
1424+ if (II.getIntrinsicID () != IID)
1425+ return instCombineSVEAllActive (II, IID);
1426+ return std::nullopt ;
1427+ }
1428+
14091429static std::optional<Instruction *> instCombineSVEVectorAdd (InstCombiner &IC,
14101430 IntrinsicInst &II) {
1411- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_add_u))
1431+ if (auto II_U =
1432+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_add_u))
14121433 return II_U;
14131434 if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
14141435 Intrinsic::aarch64_sve_mla>(
@@ -1421,9 +1442,22 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
14211442 return std::nullopt ;
14221443}
14231444
1445+ static std::optional<Instruction *>
1446+ instCombineSVEVectorAddU (InstCombiner &IC, IntrinsicInst &II) {
1447+ if (auto II_U =
1448+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_add_u))
1449+ return II_U;
1450+ else {
1451+ return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1452+ Intrinsic::aarch64_sve_mla_u>(
1453+ IC, II, true );
1454+ }
1455+ }
1456+
14241457static std::optional<Instruction *>
14251458instCombineSVEVectorFAdd (InstCombiner &IC, IntrinsicInst &II) {
1426- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fadd_u))
1459+ if (auto II_U =
1460+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fadd_u))
14271461 return II_U;
14281462 if (auto FMLA =
14291463 instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1445,6 +1479,9 @@ instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
14451479
14461480static std::optional<Instruction *>
14471481instCombineSVEVectorFAddU (InstCombiner &IC, IntrinsicInst &II) {
1482+ if (auto II_U =
1483+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fadd_u))
1484+ return II_U;
14481485 if (auto FMLA =
14491486 instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
14501487 Intrinsic::aarch64_sve_fmla>(IC, II,
@@ -1465,7 +1502,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
14651502
14661503static std::optional<Instruction *>
14671504instCombineSVEVectorFSub (InstCombiner &IC, IntrinsicInst &II) {
1468- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fsub_u))
1505+ if (auto II_U =
1506+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fsub_u))
14691507 return II_U;
14701508 if (auto FMLS =
14711509 instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1487,6 +1525,9 @@ instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
14871525
14881526static std::optional<Instruction *>
14891527instCombineSVEVectorFSubU (InstCombiner &IC, IntrinsicInst &II) {
1528+ if (auto II_U =
1529+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fsub_u))
1530+ return II_U;
14901531 if (auto FMLS =
14911532 instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
14921533 Intrinsic::aarch64_sve_fmls>(IC, II,
@@ -1507,7 +1548,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
15071548
15081549static std::optional<Instruction *> instCombineSVEVectorSub (InstCombiner &IC,
15091550 IntrinsicInst &II) {
1510- if (auto II_U = instCombineSVEAllActive (II, Intrinsic::aarch64_sve_sub_u))
1551+ if (auto II_U =
1552+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_sub_u))
15111553 return II_U;
15121554 if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
15131555 Intrinsic::aarch64_sve_mls>(
@@ -1516,17 +1558,27 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
15161558 return std::nullopt ;
15171559}
15181560
1561+ static std::optional<Instruction *>
1562+ instCombineSVEVectorSubU (InstCombiner &IC, IntrinsicInst &II) {
1563+ if (auto II_U =
1564+ instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_sub_u))
1565+ return II_U;
1566+ else {
1567+ return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1568+ Intrinsic::aarch64_sve_mls_u>(
1569+ IC, II, true );
1570+ }
1571+ }
1572+
15191573static std::optional<Instruction *> instCombineSVEVectorMul (InstCombiner &IC,
15201574 IntrinsicInst &II,
15211575 Intrinsic::ID IID) {
15221576 auto *OpPredicate = II.getOperand (0 );
15231577 auto *OpMultiplicand = II.getOperand (1 );
15241578 auto *OpMultiplier = II.getOperand (2 );
15251579
1526- // Canonicalise a non _u intrinsic only.
1527- if (II.getIntrinsicID () != IID)
1528- if (auto II_U = instCombineSVEAllActive (II, IID))
1529- return II_U;
1580+ if (auto II_U = instCombineSVEAllOrNoActive (IC, II, IID))
1581+ return II_U;
15301582
15311583 // Return true if a given instruction is a unit splat value, false otherwise.
15321584 auto IsUnitSplat = [](auto *I) {
@@ -1891,91 +1943,117 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
18911943 case Intrinsic::aarch64_sve_ptest_last:
18921944 return instCombineSVEPTest (IC, II);
18931945 case Intrinsic::aarch64_sve_fabd:
1894- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fabd_u);
1946+ case Intrinsic::aarch64_sve_fabd_u:
1947+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fabd_u);
18951948 case Intrinsic::aarch64_sve_fadd:
18961949 return instCombineSVEVectorFAdd (IC, II);
18971950 case Intrinsic::aarch64_sve_fadd_u:
18981951 return instCombineSVEVectorFAddU (IC, II);
18991952 case Intrinsic::aarch64_sve_fdiv:
1900- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fdiv_u);
1953+ case Intrinsic::aarch64_sve_fdiv_u:
1954+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fdiv_u);
19011955 case Intrinsic::aarch64_sve_fmax:
1902- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fmax_u);
1956+ case Intrinsic::aarch64_sve_fmax_u:
1957+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmax_u);
19031958 case Intrinsic::aarch64_sve_fmaxnm:
1904- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fmaxnm_u);
1959+ case Intrinsic::aarch64_sve_fmaxnm_u:
1960+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
19051961 case Intrinsic::aarch64_sve_fmin:
1906- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fmin_u);
1962+ case Intrinsic::aarch64_sve_fmin_u:
1963+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmin_u);
19071964 case Intrinsic::aarch64_sve_fminnm:
1908- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fminnm_u);
1965+ case Intrinsic::aarch64_sve_fminnm_u:
1966+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fminnm_u);
19091967 case Intrinsic::aarch64_sve_fmla:
1910- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fmla_u);
1968+ case Intrinsic::aarch64_sve_fmla_u:
1969+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmla_u);
19111970 case Intrinsic::aarch64_sve_fmls:
1912- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fmls_u);
1971+ case Intrinsic::aarch64_sve_fmls_u:
1972+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmls_u);
19131973 case Intrinsic::aarch64_sve_fmul:
19141974 case Intrinsic::aarch64_sve_fmul_u:
19151975 return instCombineSVEVectorMul (IC, II, Intrinsic::aarch64_sve_fmul_u);
19161976 case Intrinsic::aarch64_sve_fmulx:
1917- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fmulx_u);
1977+ case Intrinsic::aarch64_sve_fmulx_u:
1978+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fmulx_u);
19181979 case Intrinsic::aarch64_sve_fnmla:
1919- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fnmla_u);
1980+ case Intrinsic::aarch64_sve_fnmla_u:
1981+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fnmla_u);
19201982 case Intrinsic::aarch64_sve_fnmls:
1921- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_fnmls_u);
1983+ case Intrinsic::aarch64_sve_fnmls_u:
1984+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_fnmls_u);
19221985 case Intrinsic::aarch64_sve_fsub:
19231986 return instCombineSVEVectorFSub (IC, II);
19241987 case Intrinsic::aarch64_sve_fsub_u:
19251988 return instCombineSVEVectorFSubU (IC, II);
19261989 case Intrinsic::aarch64_sve_add:
19271990 return instCombineSVEVectorAdd (IC, II);
19281991 case Intrinsic::aarch64_sve_add_u:
1929- return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1930- Intrinsic::aarch64_sve_mla_u>(
1931- IC, II, true );
1992+ return instCombineSVEVectorAddU (IC, II);
19321993 case Intrinsic::aarch64_sve_mla:
1933- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_mla_u);
1994+ case Intrinsic::aarch64_sve_mla_u:
1995+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_mla_u);
19341996 case Intrinsic::aarch64_sve_mls:
1935- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_mls_u);
1997+ case Intrinsic::aarch64_sve_mls_u:
1998+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_mls_u);
19361999 case Intrinsic::aarch64_sve_mul:
19372000 case Intrinsic::aarch64_sve_mul_u:
19382001 return instCombineSVEVectorMul (IC, II, Intrinsic::aarch64_sve_mul_u);
19392002 case Intrinsic::aarch64_sve_sabd:
1940- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_sabd_u);
2003+ case Intrinsic::aarch64_sve_sabd_u:
2004+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_sabd_u);
19412005 case Intrinsic::aarch64_sve_smax:
1942- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_smax_u);
2006+ case Intrinsic::aarch64_sve_smax_u:
2007+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_smax_u);
19432008 case Intrinsic::aarch64_sve_smin:
1944- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_smin_u);
2009+ case Intrinsic::aarch64_sve_smin_u:
2010+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_smin_u);
19452011 case Intrinsic::aarch64_sve_smulh:
1946- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_smulh_u);
2012+ case Intrinsic::aarch64_sve_smulh_u:
2013+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_smulh_u);
19472014 case Intrinsic::aarch64_sve_sub:
19482015 return instCombineSVEVectorSub (IC, II);
19492016 case Intrinsic::aarch64_sve_sub_u:
1950- return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
1951- Intrinsic::aarch64_sve_mls_u>(
1952- IC, II, true );
2017+ return instCombineSVEVectorSubU (IC, II);
19532018 case Intrinsic::aarch64_sve_uabd:
1954- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_uabd_u);
2019+ case Intrinsic::aarch64_sve_uabd_u:
2020+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_uabd_u);
19552021 case Intrinsic::aarch64_sve_umax:
1956- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_umax_u);
2022+ case Intrinsic::aarch64_sve_umax_u:
2023+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_umax_u);
19572024 case Intrinsic::aarch64_sve_umin:
1958- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_umin_u);
2025+ case Intrinsic::aarch64_sve_umin_u:
2026+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_umin_u);
19592027 case Intrinsic::aarch64_sve_umulh:
1960- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_umulh_u);
2028+ case Intrinsic::aarch64_sve_umulh_u:
2029+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_umulh_u);
19612030 case Intrinsic::aarch64_sve_asr:
1962- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_asr_u);
2031+ case Intrinsic::aarch64_sve_asr_u:
2032+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_asr_u);
19632033 case Intrinsic::aarch64_sve_lsl:
1964- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_lsl_u);
2034+ case Intrinsic::aarch64_sve_lsl_u:
2035+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_lsl_u);
19652036 case Intrinsic::aarch64_sve_lsr:
1966- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_lsr_u);
2037+ case Intrinsic::aarch64_sve_lsr_u:
2038+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_lsr_u);
19672039 case Intrinsic::aarch64_sve_and:
1968- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_and_u);
2040+ case Intrinsic::aarch64_sve_and_u:
2041+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_and_u);
19692042 case Intrinsic::aarch64_sve_bic:
1970- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_bic_u);
2043+ case Intrinsic::aarch64_sve_bic_u:
2044+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_bic_u);
19712045 case Intrinsic::aarch64_sve_eor:
1972- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_eor_u);
2046+ case Intrinsic::aarch64_sve_eor_u:
2047+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_eor_u);
19732048 case Intrinsic::aarch64_sve_orr:
1974- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_orr_u);
2049+ case Intrinsic::aarch64_sve_orr_u:
2050+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_orr_u);
19752051 case Intrinsic::aarch64_sve_sqsub:
1976- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_sqsub_u);
2052+ case Intrinsic::aarch64_sve_sqsub_u:
2053+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_sqsub_u);
19772054 case Intrinsic::aarch64_sve_uqsub:
1978- return instCombineSVEAllActive (II, Intrinsic::aarch64_sve_uqsub_u);
2055+ case Intrinsic::aarch64_sve_uqsub_u:
2056+ return instCombineSVEAllOrNoActive (IC, II, Intrinsic::aarch64_sve_uqsub_u);
19792057 case Intrinsic::aarch64_sve_tbl:
19802058 return instCombineSVETBL (IC, II);
19812059 case Intrinsic::aarch64_sve_uunpkhi:
0 commit comments