From 97ab9ffbe6c3ae30306e60306f7e13c9dd3fdb90 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 27 Feb 2026 18:00:37 +0800 Subject: [PATCH] [VPlan] Add nuw to unrolled canonical IVs After #183080, the canonical IV (not the increment!) can't overflow. So now canonical IVs that are unrolled will have steps that don't overflow, so we can add the nuw flag. This allows us to tighten the VPlanVerifier isKnownMonotonic check by restricting it to adds with nuw. --- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 + .../Transforms/Vectorize/VPlanVerifier.cpp | 4 +- .../AArch64/conditional-branches-cost.ll | 2 +- .../epilog-vectorization-widen-inductions.ll | 4 +- .../AArch64/pr60831-sve-inv-store-crash.ll | 2 +- .../LoopVectorize/AArch64/reduction-cost.ll | 2 +- .../replicating-load-store-costs-apple.ll | 6 +-- .../AArch64/replicating-load-store-costs.ll | 8 ++-- .../LoopVectorize/AArch64/select-index.ll | 16 +++---- .../LoopVectorize/PowerPC/exit-branch-cost.ll | 34 ++++++------- .../LoopVectorize/RISCV/strided-accesses.ll | 8 ++-- .../LoopVectorize/RISCV/uniform-load-store.ll | 4 +- .../X86/cost-constant-known-via-scev.ll | 2 +- .../LoopVectorize/X86/cost-model.ll | 6 +-- .../LoopVectorize/X86/induction-costs.ll | 12 ++--- .../LoopVectorize/X86/load-deref-pred.ll | 6 +-- .../LoopVectorize/X86/masked-store-cost.ll | 6 +-- .../X86/pr131359-dead-for-splice.ll | 4 +- .../Transforms/LoopVectorize/X86/pr54634.ll | 6 +-- .../X86/replicating-load-store-costs.ll | 16 +++---- .../LoopVectorize/X86/uniform_mem_op.ll | 6 +-- ...ned-value-used-as-scalar-and-first-lane.ll | 6 +-- llvm/test/Transforms/LoopVectorize/assume.ll | 2 +- .../cse-gep-source-element-type.ll | 2 +- .../LoopVectorize/find-last-iv-interleave.ll | 4 +- .../first-order-recurrence-tail-folding.ll | 6 +-- .../LoopVectorize/first-order-recurrence.ll | 4 +- ...fmax-without-fast-math-flags-interleave.ll | 2 +- .../Transforms/LoopVectorize/induction.ll | 14 +++--- .../Transforms/LoopVectorize/iv-select-cmp.ll | 48 +++++++++---------- .../test/Transforms/LoopVectorize/metadata.ll | 2 +- .../LoopVectorize/noalias-scope-decl.ll | 2 +- .../pr45679-fold-tail-by-masking.ll | 4 +- .../LoopVectorize/predicate-switch.ll | 2 +- .../LoopVectorize/reduction-inloop-uf4.ll | 15 +++--- .../LoopVectorize/scalable-assume.ll | 2 +- .../scalable-first-order-recurrence.ll | 4 +- .../LoopVectorize/scalable-inductions.ll | 4 +- .../select-index-interleaving.ll | 16 +++---- .../tail-folding-vectorization-factor-1.ll | 6 +-- 40 files changed, 150 insertions(+), 151 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 8e394ce7f090c..faffbd452b096 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -220,6 +220,8 @@ void UnrollState::unrollWidenInductionByUF( } else { AddOpc = Instruction::Add; AddFlags = VPIRFlags::getDefaultFlags(AddOpc); + if (cast(IV)->isCanonical()) + AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false); } for (unsigned Part = 1; Part != UF; ++Part) { std::string Name = diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index d62a71883b6ea..1399649d1dbb2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -141,9 +141,9 @@ bool VPlanVerifier::verifyPhiRecipes(const VPBasicBlock *VPBB) { static bool isKnownMonotonic(VPValue *V) { VPValue *X, *Y; - // TODO: Check for hasNoUnsignedWrap() when we set nuw in VPlanUnroll if (match(V, m_Add(m_VPValue(X), m_VPValue(Y)))) - return isKnownMonotonic(X) && isKnownMonotonic(Y); + return cast(V)->hasNoUnsignedWrap() && + isKnownMonotonic(X) && isKnownMonotonic(Y); if (match(V, m_StepVector())) return true; // Only handle a subset of IVs until we can guarantee there's no overflow. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index c541f79ae1812..192495f0b99de 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -1115,7 +1115,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; DEFAULT-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 1) ; DEFAULT-NEXT: [[TMP1:%.*]] = trunc nuw nsw <4 x i64> [[TMP0]] to <4 x i32> ; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 85726c161cc54..eea496303a206 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -115,7 +115,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 @@ -281,7 +281,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll index f2c2c636f92b9..337fcf451b2f8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll @@ -65,7 +65,7 @@ define void @test_invar_gep(ptr %dst) #0 { ; IC2: vector.body: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IC2-NEXT: [[DOTSPLAT:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; IC2-NEXT: [[TMP22:%.*]] = add [[DOTSPLAT]], [[TMP21]] +; IC2-NEXT: [[TMP22:%.*]] = add nuw [[DOTSPLAT]], [[TMP21]] ; IC2-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() ; IC2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4 ; IC2-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll index 9c9956bb76689..e5886d83c0182 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-cost.ll @@ -13,7 +13,7 @@ define i64 @reduction(i64 %arg) #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP5]] = or <4 x i32> [[VEC_PHI]], splat (i32 1) ; CHECK-NEXT: [[TMP1]] = or <4 x i32> [[VEC_PHI2]], splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll index b439353444409..a6d956ccac3d1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs-apple.ll @@ -164,9 +164,9 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) -; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i32> [[STEP_ADD_2]], splat (i32 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i32> [[STEP_ADD]], splat (i32 2) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i32> [[STEP_ADD_2]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 6 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index fceab6f823d5a..b6ddf1d3e7a90 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -152,7 +152,7 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer @@ -205,9 +205,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i128, ptr [[DST]], <2 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll index eca08acca8870..17a64357b9116 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll @@ -18,7 +18,7 @@ define i64 @test_vectorize_select_umin_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 @@ -107,7 +107,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 @@ -196,7 +196,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 @@ -285,7 +285,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 @@ -374,7 +374,7 @@ define i64 @test_vectorize_select_umax_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 @@ -463,7 +463,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 @@ -552,7 +552,7 @@ define i64 @test_vectorize_select_smax_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 @@ -641,7 +641,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[GEP]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 615852f960bd4..5380658a84653 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -39,17 +39,17 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[VEC_PHI19:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP65:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI20:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI21:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP67:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add <2 x i64> [[STEP_ADD_4]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add <2 x i64> [[STEP_ADD_5]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add <2 x i64> [[STEP_ADD_6]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_8:%.*]] = add <2 x i64> [[STEP_ADD_7]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_9:%.*]] = add <2 x i64> [[STEP_ADD_8]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add <2 x i64> [[STEP_ADD_9]], splat (i64 2) -; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add <2 x i64> [[STEP_ADD_10]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <2 x i64> [[STEP_ADD]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <2 x i64> [[STEP_ADD_2]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_4:%.*]] = add nuw <2 x i64> [[STEP_ADD_3]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_5:%.*]] = add nuw <2 x i64> [[STEP_ADD_4]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_6:%.*]] = add nuw <2 x i64> [[STEP_ADD_5]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_7:%.*]] = add nuw <2 x i64> [[STEP_ADD_6]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_8:%.*]] = add nuw <2 x i64> [[STEP_ADD_7]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_9:%.*]] = add nuw <2 x i64> [[STEP_ADD_8]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add nuw <2 x i64> [[STEP_ADD_9]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add nuw <2 x i64> [[STEP_ADD_10]], splat (i64 2) ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4 @@ -175,14 +175,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_N33:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]] ; CHECK-NEXT: br i1 [[CMP_N33]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX57:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL58:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL46:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX47:%.*]] = phi i64 [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL48:%.*]] = phi ptr [ [[TMP56]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL56]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX57]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL58]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL46]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX47]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL48]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[PTR_IV]], align 1 ; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP53]] to i64 ; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[IV]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 85d9a11cb65e0..642e28a4823e3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -54,7 +54,7 @@ define void @single_constant_stride_int_scaled(ptr %p) { ; CHECK-UF2: vector.body: ; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-UF2-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add nuw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-UF2-NEXT: [[TMP9:%.*]] = shl nuw nsw [[VEC_IND]], splat (i64 3) ; CHECK-UF2-NEXT: [[TMP10:%.*]] = shl nuw nsw [[STEP_ADD]], splat (i64 3) ; CHECK-UF2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP9]] @@ -873,7 +873,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-UF2: vector.body: ; STRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-UF2-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP31]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = add nuw [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-UF2-NEXT: [[TMP33:%.*]] = mul nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT11]] ; STRIDED-UF2-NEXT: [[TMP34:%.*]] = mul nuw nsw [[STEP_ADD]], [[BROADCAST_SPLAT11]] ; STRIDED-UF2-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[P]], [[TMP33]] @@ -1325,7 +1325,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; NOSTRIDED-UF2: vector.body: ; NOSTRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-UF2-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; NOSTRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; NOSTRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = add nuw [[VEC_IND]], [[BROADCAST_SPLAT]] ; NOSTRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[IN:%.*]], [[VEC_IND]] ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[IN]], [[STEP_ADD]] ; NOSTRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( align 8 [[TMP7]], splat (i1 true), poison) @@ -1403,7 +1403,7 @@ define void @constant_stride_reinterpret(ptr noalias %in, ptr noalias %out) { ; STRIDED-UF2: vector.body: ; STRIDED-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; STRIDED-UF2-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; STRIDED-UF2-NEXT: [[STEP_ADD:%.*]] = add nuw [[VEC_IND]], [[BROADCAST_SPLAT]] ; STRIDED-UF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[IN:%.*]], [[VEC_IND]] ; STRIDED-UF2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[IN]], [[STEP_ADD]] ; STRIDED-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( align 8 [[TMP7]], splat (i1 true), poison) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index adda7c362b8e8..a3b91e7ff4057 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -252,7 +252,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN: [[VECTOR_BODY]]: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) ; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) ; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> align 8 [[BROADCAST_SPLAT]], <4 x i1> [[TMP1]], <4 x i64> poison) @@ -708,7 +708,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN: [[VECTOR_BODY]]: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10) ; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP1]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index a6bbf8683c592..bf3ff15236038 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -71,7 +71,7 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[STEP_ADD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index f81f2a32318d4..1bd308855b9ea 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -759,9 +759,9 @@ define i32 @g(i64 %n) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) -; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i32> [[STEP_ADD]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64> ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[STEP_ADD_2]] to <4 x i64> diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 1272cdd7eb71c..04143338df984 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -21,9 +21,9 @@ define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <8 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add nuw <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add nuw <8 x i64> [[STEP_ADD1]], splat (i64 8) ; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], splat (i32 8) ; CHECK-NEXT: [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], splat (i32 8) ; CHECK-NEXT: [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], splat (i32 8) @@ -461,9 +461,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add nuw <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add nuw <8 x i64> [[STEP_ADD1]], splat (i64 8) ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 78363e13595cb..934dd3eccb9b2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -31,9 +31,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add nuw <4 x i64> [[STEP_ADD1]], splat (i64 4) ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index 1d0906902ad62..84f6c66d89670 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -19,9 +19,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <8 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add nuw <8 x i64> [[STEP_ADD]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add nuw <8 x i64> [[STEP_ADD1]], splat (i64 8) ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll index 63f8036b9b6a0..a7563c0d64234 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr131359-dead-for-splice.ll @@ -16,7 +16,7 @@ define void @no_use() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 @@ -58,7 +58,7 @@ define void @dead_use() { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index 785a248344825..95b623ef4b667 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -37,9 +37,9 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD4:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD5:%.*]] = add nuw <4 x i64> [[STEP_ADD4]], splat (i64 4) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index 5902d588c2f41..957473775e251 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -25,9 +25,9 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I64: [[VECTOR_BODY]]: ; I64-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; I64-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I64-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; I64-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) -; I64-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) +; I64-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) +; I64-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i32> [[STEP_ADD]], splat (i32 4) +; I64-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; I64-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0 ; I64-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; I64-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 2 @@ -181,9 +181,9 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 { ; I32: [[VECTOR_BODY]]: ; I32-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; I32-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; I32-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) -; I32-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) -; I32-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) +; I32-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) +; I32-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i32> [[STEP_ADD]], splat (i32 4) +; I32-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; I32-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 ; I32-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 1 ; I32-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 2 @@ -1150,7 +1150,7 @@ define void @replicated_load_wide_store_derived_iv_zext_and(ptr noalias %src, pt ; I64-NEXT: store <4 x float> [[TMP62]], ptr [[TMP64]], align 4 ; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; I64-NEXT: [[TMP65:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; I64-NEXT: br i1 [[TMP65]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; I64-NEXT: br i1 [[TMP65]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; I64: [[MIDDLE_BLOCK]]: ; I64-NEXT: br label %[[SCALAR_PH]] ; I64: [[SCALAR_PH]]: @@ -1274,7 +1274,7 @@ define void @replicated_load_wide_store_derived_iv_zext_and2(ptr noalias %dst, p ; I64-NEXT: store <4 x float> [[TMP54]], ptr [[TMP56]], align 4 ; I64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; I64-NEXT: [[TMP57:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 -; I64-NEXT: br i1 [[TMP57]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; I64-NEXT: br i1 [[TMP57]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; I64: [[MIDDLE_BLOCK]]: ; I64-NEXT: br label %[[SCALAR_PH]] ; I64: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll index 2d3ab3e8b5c43..df3a24ba004a8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll @@ -339,9 +339,9 @@ define i32 @test_count_bits(ptr %test_base) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 12 diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index 26268f1ff4e94..50b6719c96ea1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -14,9 +14,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8 diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index eddd5f9ddc584..6139c722859d7 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -145,7 +145,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[VEC_IND]], splat (i64 495616) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[STEP_ADD]], splat (i64 495616) ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) diff --git a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll index 901652537a5c5..5f92123313ba4 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll @@ -73,7 +73,7 @@ define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <4 x i64> [[STEP_ADD]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[VEC_IND]] diff --git a/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll b/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll index 97bb1374ff6e3..dd04ba50dd260 100644 --- a/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/find-last-iv-interleave.ll @@ -16,7 +16,7 @@ define i64 @findlast_iv_interleave(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 @@ -268,7 +268,7 @@ define i32 @findlast_iv_i32_interleave(ptr %a, i32 %n) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll index 29b2b32eb4bda..1899b1ece0889 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-tail-folding.ll @@ -95,7 +95,7 @@ define i32 @FOR_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] -; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -384,7 +384,7 @@ define i32 @FOR_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] -; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 @@ -672,7 +672,7 @@ define i32 @FOR_and_next_used_outside(ptr noalias %A, ptr noalias %B, i64 %n) { ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE12:.*]] ] ; VF2IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE12]] ] ; VF2IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[PRED_STORE_CONTINUE12]] ] -; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2IC2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; VF2IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index 1b2ff3526790a..05a6eabed4438 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1263,7 +1263,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x, i32 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -2859,7 +2859,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE29]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE29]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE29]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1 diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 9748409a0094d..6f3c6f9f3a9a1 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -227,7 +227,7 @@ define float @fmaxnum_tailfold(ptr %src, i64 %n) #0 { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP51:%.*]], %[[PRED_LOAD_CONTINUE15]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP52:%.*]], %[[PRED_LOAD_CONTINUE15]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index bdf984e0956a8..c1b912fd54b7d 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1721,7 +1721,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NEXT: store i32 [[TMP39]], ptr [[TMP27]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NEXT: store i32 [[TMP22]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4) +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 4) ; UNROLL-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL: middle.block: @@ -1773,7 +1773,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2 @@ -4111,14 +4111,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8 ; UNROLL-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) +; UNROLL-NEXT: [[VEC_IND_NEXT]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; UNROLL: middle.block: @@ -4149,7 +4149,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 @@ -4184,14 +4184,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 4) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 ; INTERLEAVE-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8) +; INTERLEAVE-NEXT: [[VEC_IND_NEXT]] = add nuw <4 x i32> [[VEC_IND]], splat (i32 8) ; INTERLEAVE-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] ; INTERLEAVE: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index ab2b24dc51683..2e11fa158b8d1 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -65,9 +65,9 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 @@ -258,9 +258,9 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 @@ -451,9 +451,9 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 @@ -644,9 +644,9 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 @@ -837,9 +837,9 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 @@ -1034,9 +1034,9 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 @@ -1253,9 +1253,9 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 @@ -2548,9 +2548,9 @@ define i64 @select_icmp_xor_not_iv(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index fed5df2b65228..0a22c4b1e70a6 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -443,7 +443,7 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[VEC_IND1:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[VECTOR_BODY]] ] -; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; INTERLEAVE-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; INTERLEAVE-NEXT: [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND1]], splat (i32 2) ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[INDEX]], !custom_md [[META2:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], <2 x i64> [[VEC_IND]] diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 4fab2995f14bd..944a987bd04d0 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -158,7 +158,7 @@ define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, pt ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 495616) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], splat (i64 495616) ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0]]) diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index 9ed35fb0a79e8..bfe996fccd0b2 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -69,7 +69,7 @@ define void @pr45679(ptr %A) { ; VF2UF2: vector.body: ; VF2UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] -; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i32> [[VEC_IND]], splat (i32 2) ; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 13) ; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], splat (i32 13) ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 @@ -238,7 +238,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF2UF2: vector.body: ; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] -; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 13) ; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], splat (i64 13) ; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index acddbfd6aab81..678b9d047d9f4 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -604,7 +604,7 @@ define void @switch_unconditional_duplicate_target(ptr %start, ptr %dest) { ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; IC2-NEXT: [[STEP_ADD:%.*]] = add nuw <2 x i64> [[VEC_IND]], splat (i64 2) ; IC2-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> [[VEC_IND]] ; IC2-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 ; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> [[STEP_ADD]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index b55c563162e13..1c94e259fc71b 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -154,13 +154,10 @@ define i32 @predicated(ptr noalias nocapture %A) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP104:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[PRED_LOAD_CONTINUE36]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP110:%.*]], [[PRED_LOAD_CONTINUE36]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12) ; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], splat (i64 257) -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD_2]], splat (i64 257) -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD_3]], splat (i64 257) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 253) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 249) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 245) ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: @@ -384,9 +381,9 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[PRED_LOAD_CONTINUE38]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[PRED_LOAD_CONTINUE38]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP118:%.*]], [[PRED_LOAD_CONTINUE38]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8) -; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 8) +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 12) ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll index e3045e762e535..0a25460a52fad 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll @@ -159,7 +159,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult [[VEC_IND]], splat (i64 495616) ; CHECK-NEXT: [[TMP10:%.*]] = icmp ult [[STEP_ADD]], splat (i64 495616) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], splat (float 2.300000e+01), splat (float 4.200000e+01) diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 645c2742095bb..df1f1ef819317 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -673,8 +673,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4UF2: [[VECTOR_BODY]]: ; CHECK-VF4UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[STEP_ADD:%.*]] = phi [ [[TMP7]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT1:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT:%.*]] = add [[STEP_ADD]], [[BROADCAST_SPLAT2]] +; CHECK-VF4UF2-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP7]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT1:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT:%.*]] = add nuw [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-VF4UF2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] ; CHECK-VF4UF2-NEXT: [[VEC_IND_NEXT1]] = add [[VEC_IND_NEXT]], [[BROADCAST_SPLAT2]] ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index c3bf4e9f783c2..606051f5f9a8a 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -27,7 +27,7 @@ define void @add_ind64_unrolled(ptr noalias nocapture %a, ptr noalias nocapture ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i64 [[TMP8]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP9]], align 8 @@ -101,7 +101,7 @@ define void @add_ind64_unrolled_nxv1i64(ptr noalias nocapture %a, ptr noalias no ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i64 [[TMP2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll index 3bf4e145ba01c..50e6e254d8a9d 100644 --- a/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll +++ b/llvm/test/Transforms/LoopVectorize/select-index-interleaving.ll @@ -18,7 +18,7 @@ define i64 @test_vectorize_select_umin_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ splat (i64 50), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i64> [ splat (i64 50), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 @@ -107,7 +107,7 @@ define i64 @test_vectorize_select_umin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 50), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 50), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 @@ -196,7 +196,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 @@ -285,7 +285,7 @@ define i64 @test_vectorize_select_smin_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 @@ -374,7 +374,7 @@ define i64 @test_vectorize_select_umax_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 @@ -463,7 +463,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 @@ -552,7 +552,7 @@ define i64 @test_vectorize_select_smax_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ poison, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[GEP1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP1]], align 4 @@ -641,7 +641,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[GEP]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index fc0582b752341..c6bcd708c0fce 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -159,9 +159,9 @@ define i64 @live_out_scalar_vf(i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[STEP_ADD_3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) -; CHECK-NEXT: [[STEP_ADD_3]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add nuw <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[STEP_ADD_3]] = add nuw <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]