diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index bdf06e39d7367..c9d775367f929 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1071,9 +1071,11 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, EVT VecVT = getTLI()->getValueType(DL, RetTy); unsigned EltSizeInBytes = cast(ICA.getArgs()[2])->getZExtValue(); - if (is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes) && - VecVT.getVectorMinNumElements() == (16 / EltSizeInBytes)) - return 1; + if (!is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes) || + VecVT.getVectorMinNumElements() != (16 / EltSizeInBytes)) + break; + // For fixed-vector types we need to AND the mask with a ptrue vl. + return isa(RetTy) ? 2 : 1; } break; } diff --git a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll index 5b3070fcf347e..74bd41db4a64d 100644 --- a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll +++ b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll @@ -17,10 +17,10 @@ define void @loop_dependence_war_mask(ptr %a, ptr %b) { ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-LABEL: 'loop_dependence_war_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4) @@ -54,10 +54,10 @@ define void @loop_dependence_raw_mask(ptr %a, ptr %b) { ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-LABEL: 'loop_dependence_raw_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4)