From b0e5b947735b9597a4c55f8dc16b4b54a4655c14 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 12 Jan 2026 13:37:22 +0000 Subject: [PATCH 1/3] [AArch64] Tweak fixed-length loop.dependence.mask costs It's not free (MOV + XTN) to convert from the predicate result of whilewr/rw to a fixed-length mask, so the cost should be slightly higher. --- .../AArch64/AArch64TargetTransformInfo.cpp | 12 +++++++++--- .../CostModel/AArch64/loop_dependence_mask.ll | 16 ++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index bdf06e39d7367..bf7792be87208 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1071,9 +1071,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, EVT VecVT = getTLI()->getValueType(DL, RetTy); unsigned EltSizeInBytes = cast(ICA.getArgs()[2])->getZExtValue(); - if (is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes) && - VecVT.getVectorMinNumElements() == (16 / EltSizeInBytes)) - return 1; + if (!is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes) || + VecVT.getVectorMinNumElements() != (16 / EltSizeInBytes)) + break; + InstructionCost Cost = 1; + // For fixed-vector types at least a MOV and XTN are needed to convert + // from the predicate to a fixed-length mask. + if (isa(RetTy)) + Cost += 2; + return Cost; } break; } diff --git a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll index 5b3070fcf347e..7acd776a91b3c 100644 --- a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll +++ b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll @@ -17,10 +17,10 @@ define void @loop_dependence_war_mask(ptr %a, ptr %b) { ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-LABEL: 'loop_dependence_war_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4) @@ -54,10 +54,10 @@ define void @loop_dependence_raw_mask(ptr %a, ptr %b) { ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-LABEL: 'loop_dependence_raw_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4) From 4827e6e697ce4ebe5f68958fedc63e372c56a036 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 6 Feb 2026 11:15:51 +0000 Subject: [PATCH 2/3] Fixups --- .../AArch64/AArch64TargetTransformInfo.cpp | 5 ++--- .../CostModel/AArch64/loop_dependence_mask.ll | 16 ++++++++-------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index bf7792be87208..dae0bcddcf93f 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1075,10 +1075,9 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, VecVT.getVectorMinNumElements() != (16 / EltSizeInBytes)) break; InstructionCost Cost = 1; - // For fixed-vector types at least a MOV and XTN are needed to convert - // from the predicate to a fixed-length mask. + // For fixed-vector types we need to AND the mask with a ptrue vl. if (isa(RetTy)) - Cost += 2; + Cost += 1; return Cost; } break; diff --git a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll index 7acd776a91b3c..74bd41db4a64d 100644 --- a/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll +++ b/llvm/test/Analysis/CostModel/AArch64/loop_dependence_mask.ll @@ -17,10 +17,10 @@ define void @loop_dependence_war_mask(ptr %a, ptr %b) { ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-LABEL: 'loop_dependence_war_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call @llvm.loop.dependence.war.mask.nxv16i1(ptr %a, ptr %b, i64 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call @llvm.loop.dependence.war.mask.nxv8i1(ptr %a, ptr %b, i64 2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call @llvm.loop.dependence.war.mask.nxv4i1(ptr %a, ptr %b, i64 4) @@ -54,10 +54,10 @@ define void @loop_dependence_raw_mask(ptr %a, ptr %b) { ; CHECK-EXPANDED-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-LABEL: 'loop_dependence_raw_mask' -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res5 = call @llvm.loop.dependence.raw.mask.nxv16i1(ptr %a, ptr %b, i64 1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res6 = call @llvm.loop.dependence.raw.mask.nxv8i1(ptr %a, ptr %b, i64 2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res7 = call @llvm.loop.dependence.raw.mask.nxv4i1(ptr %a, ptr %b, i64 4) From 4b30f0de8b6efa30b0ece923b2ec8401cb82d0be Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 6 Feb 2026 14:56:18 +0000 Subject: [PATCH 3/3] Fixups --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index dae0bcddcf93f..c9d775367f929 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1074,11 +1074,8 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if (!is_contained({1u, 2u, 4u, 8u}, EltSizeInBytes) || VecVT.getVectorMinNumElements() != (16 / EltSizeInBytes)) break; - InstructionCost Cost = 1; // For fixed-vector types we need to AND the mask with a ptrue vl. - if (isa(RetTy)) - Cost += 1; - return Cost; + return isa(RetTy) ? 2 : 1; } break; }