[VPlan] Add nuw to unrolled canonical IVs#183716
Conversation
After llvm#183080, the canonical IV (not the increment!) can't overflow. So now canonical IVs that are unrolled will have steps that don't overflow, so we can add the nuw flag. This allows us to tighten the VPlanVerifier isKnownMonotonic check by restricting it to adds with nuw.
|
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangesAfter #183080, the canonical IV (not the increment!) can't overflow. So now canonical IVs that are unrolled will have steps that don't overflow, so we can add the nuw flag. This allows us to tighten the VPlanVerifier isKnownMonotonic check by restricting it to adds with nuw. Patch is 92.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/183716.diff 40 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 8e394ce7f090c..faffbd452b096 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -220,6 +220,8 @@ void UnrollState::unrollWidenInductionByUF(
} else {
AddOpc = Instruction::Add;
AddFlags = VPIRFlags::getDefaultFlags(AddOpc);
+ if (cast<VPWidenIntOrFpInductionRecipe>(IV)->isCanonical())
+ AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false);
}
for (unsigned Part = 1; Part != UF; ++Part) {
std::string Name =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index d62a71883b6ea..1399649d1dbb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -141,9 +141,9 @@ bool VPlanVerifier::verifyPhiRecipes(const VPBasicBlock *VPBB) {
static bool isKnownMonotonic(VPValue *V) {
VPValue *X, *Y;
- // TODO: Check for hasNoUnsignedWrap() when we set nuw in VPlanUnroll
if (match(V, m_Add(m_VPValue(X), m_VPValue(Y))))
- return isKnownMonotonic(X) && isKnownMonotonic(Y);
+ return cast<VPRecipeWithIRFlags>(V)->hasNoUnsignedWrap() &&
+ isKnownMonotonic(X) && isKnownMonotonic(Y);
if (match(V, m_StepVector()))
return true;
// Only handle a subset of IVs until we can guarantee there's no overflow.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index c541f79ae1812..192495f0b99de 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1115,7 +1115,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1)
; DEFAULT-NEXT: [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32>
; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 85726c161cc54..eea496303a206 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -115,7 +115,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4
@@ -281,7 +281,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10)
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index f2c2c636f92b9..337fcf451b2f8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -65,7 +65,7 @@ define void @test_invar_gep(ptr %dst) #0 {
; IC2: vector.body:
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IC2-NEXT: [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
+; IC2-NEXT: [[TMP22:%.*]] = add nuw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
; IC2-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; IC2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4
; IC2-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
index 9c9956bb76689..e5886d83c0182 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
@@ -13,7 +13,7 @@ define i64 @reduction(i64 %arg) #0 {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP5]] = or <4 x i32> [[VEC_PHI]], splat (i32 1)
; CHECK-NEXT: [[TMP1]] = or <4 x i32> [[VEC_PHI2]], splat (i32 1)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
index b439353444409..a6d956ccac3d1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
@@ -164,9 +164,9 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i32> [[STEP_ADD]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i32> [[STEP_ADD_2]], splat (i32 2)
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 6
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index fceab6f823d5a..b6ddf1d3e7a90 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -152,7 +152,7 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
@@ -205,9 +205,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
index eca08acca8870..17a64357b9116 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
@@ -18,7 +18,7 @@ define i64 @test_vectorize_select_umin_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -107,7 +107,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -196,7 +196,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -285,7 +285,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -374,7 +374,7 @@ define i64 @test_vectorize_select_umax_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -463,7 +463,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -552,7 +552,7 @@ define i64 @test_vectorize_select_smax_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -641,7 +641,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
index 615852f960bd4..5380658a84653 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
@@ -39,17 +39,17 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
; CHECK-NEXT: [[VEC_PHI19:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP65:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI20:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI21:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP67:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add <2 x i64> [[STEP_ADD_4]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add <2 x i64> [[STEP_ADD_5]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add <2 x i64> [[STEP_ADD_6]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_8:%.*]] = add <2 x i64> [[STEP_ADD_7]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_9:%.*]] = add <2 x i64> [[STEP_ADD_8]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add <2 x i64> [[STEP_ADD_9]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add <2 x i64> [[STEP_ADD_10]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add nuw <2 x i64> [[STEP_ADD_3]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add nuw <2 x i64> [[STEP_ADD_4]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add nuw <2 x i64> [[STEP_ADD_5]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add nuw <2 x i64> [[STEP_ADD_6]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_8:%.*]] = add nuw <2 x i64> [[STEP_ADD_7]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_9:%.*]] = add nuw <2 x i64> [[STEP_ADD_8]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add nuw <2 x i64> [[STEP_ADD_9]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add nuw <2 x i64> [[STEP_ADD_10]], splat (i64 2)
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4
@@ -175,14 +175,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
; CHECK-NEXT: [[CMP_N33:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]]
; CHECK-NEXT: br i1 [[CMP_N33]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_...
[truncated]
|
|
@llvm/pr-subscribers-backend-powerpc Author: Luke Lau (lukel97) ChangesAfter #183080, the canonical IV (not the increment!) can't overflow. So now canonical IVs that are unrolled will have steps that don't overflow, so we can add the nuw flag. This allows us to tighten the VPlanVerifier isKnownMonotonic check by restricting it to adds with nuw. Patch is 92.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/183716.diff 40 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 8e394ce7f090c..faffbd452b096 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -220,6 +220,8 @@ void UnrollState::unrollWidenInductionByUF(
} else {
AddOpc = Instruction::Add;
AddFlags = VPIRFlags::getDefaultFlags(AddOpc);
+ if (cast<VPWidenIntOrFpInductionRecipe>(IV)->isCanonical())
+ AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false);
}
for (unsigned Part = 1; Part != UF; ++Part) {
std::string Name =
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index d62a71883b6ea..1399649d1dbb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -141,9 +141,9 @@ bool VPlanVerifier::verifyPhiRecipes(const VPBasicBlock *VPBB) {
static bool isKnownMonotonic(VPValue *V) {
VPValue *X, *Y;
- // TODO: Check for hasNoUnsignedWrap() when we set nuw in VPlanUnroll
if (match(V, m_Add(m_VPValue(X), m_VPValue(Y))))
- return isKnownMonotonic(X) && isKnownMonotonic(Y);
+ return cast<VPRecipeWithIRFlags>(V)->hasNoUnsignedWrap() &&
+ isKnownMonotonic(X) && isKnownMonotonic(Y);
if (match(V, m_StepVector()))
return true;
// Only handle a subset of IVs until we can guarantee there's no overflow.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index c541f79ae1812..192495f0b99de 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -1115,7 +1115,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT: [[VECTOR_BODY]]:
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1)
; DEFAULT-NEXT: [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32>
; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 85726c161cc54..eea496303a206 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -115,7 +115,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2
; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4
@@ -281,7 +281,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10)
; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
index f2c2c636f92b9..337fcf451b2f8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll
@@ -65,7 +65,7 @@ define void @test_invar_gep(ptr %dst) #0 {
; IC2: vector.body:
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IC2-NEXT: [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IC2-NEXT: [[TMP22:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
+; IC2-NEXT: [[TMP22:%.*]] = add nuw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP21]]
; IC2-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; IC2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4
; IC2-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
index 9c9956bb76689..e5886d83c0182 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll
@@ -13,7 +13,7 @@ define i64 @reduction(i64 %arg) #0 {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP5]] = or <4 x i32> [[VEC_PHI]], splat (i32 1)
; CHECK-NEXT: [[TMP1]] = or <4 x i32> [[VEC_PHI2]], splat (i32 1)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
index b439353444409..a6d956ccac3d1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll
@@ -164,9 +164,9 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i32> [[STEP_ADD]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i32> [[STEP_ADD_2]], splat (i32 2)
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 6
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index fceab6f823d5a..b6ddf1d3e7a90 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -152,7 +152,7 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2)
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
@@ -205,9 +205,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2)
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2)
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
index eca08acca8870..17a64357b9116 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll
@@ -18,7 +18,7 @@ define i64 @test_vectorize_select_umin_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -107,7 +107,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -196,7 +196,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -285,7 +285,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -374,7 +374,7 @@ define i64 @test_vectorize_select_umax_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -463,7 +463,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -552,7 +552,7 @@ define i64 @test_vectorize_select_smax_first_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
@@ -641,7 +641,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) {
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
index 615852f960bd4..5380658a84653 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
@@ -39,17 +39,17 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
; CHECK-NEXT: [[VEC_PHI19:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP65:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI20:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI21:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP67:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add <2 x i64> [[STEP_ADD_4]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add <2 x i64> [[STEP_ADD_5]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add <2 x i64> [[STEP_ADD_6]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_8:%.*]] = add <2 x i64> [[STEP_ADD_7]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_9:%.*]] = add <2 x i64> [[STEP_ADD_8]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add <2 x i64> [[STEP_ADD_9]], splat (i64 2)
-; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add <2 x i64> [[STEP_ADD_10]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add nuw <2 x i64> [[STEP_ADD_3]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add nuw <2 x i64> [[STEP_ADD_4]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add nuw <2 x i64> [[STEP_ADD_5]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add nuw <2 x i64> [[STEP_ADD_6]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_8:%.*]] = add nuw <2 x i64> [[STEP_ADD_7]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_9:%.*]] = add nuw <2 x i64> [[STEP_ADD_8]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add nuw <2 x i64> [[STEP_ADD_9]], splat (i64 2)
+; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add nuw <2 x i64> [[STEP_ADD_10]], splat (i64 2)
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4
@@ -175,14 +175,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
; CHECK-NEXT: [[CMP_N33:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]]
; CHECK-NEXT: br i1 [[CMP_N33]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_...
[truncated]
|
fhahn
left a comment
There was a problem hiding this comment.
LGTM, thanks
should be fine, as the last lane of the increment will be VEC_N -1, which should not wrap even if N_VEC wrapped around to zero
After llvm#183080, the canonical IV (not the increment!) can't overflow. So now canonical IVs that are unrolled will have steps that don't overflow, so we can add the nuw flag. This allows us to tighten the VPlanVerifier isKnownMonotonic check by restricting it to adds with nuw.
After #183080, the canonical IV (not the increment!) can't overflow. So now canonical IVs that are unrolled will have steps that don't overflow, so we can add the nuw flag.
This allows us to tighten the VPlanVerifier isKnownMonotonic check by restricting it to adds with nuw.