diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 7573467917c73..8b9cbc03d0b43 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -2199,6 +2199,8 @@ Value *llvm::addDiffRuntimeChecks( // the compare, to allow detecting and re-using redundant compares. DenseMap, Value *> SeenCompares; for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) { + assert(IC * AccessSize > 0 && + "Threshold must be non-zero to use diff-check"); Type *Ty = SinkStart->getType(); // Compute VF * IC * AccessSize. auto *VFTimesICTimesSize = @@ -2215,9 +2217,14 @@ Value *llvm::addDiffRuntimeChecks( if (IsConflict) continue; - IsConflict = - ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check"); + // Use (Diff - 1) getName() + ".fr"); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clmul.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clmul.ll index e453fd4f12a3f..d5c73e17d7ced 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clmul.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clmul.ll @@ -10,13 +10,15 @@ define void @clmul_loop(ptr %a, ptr %b, ptr %c, i64 %n) { ; CHECK-NEXT: [[B3:%.*]] = ptrtoaddr ptr [[B]] to i64 ; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64 ; CHECK-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 6 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[C1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[C1]], [[B3]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP12]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index e5efddaac9d1a..7644045442238 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -1253,9 +1253,13 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[C1]], [[A2]] -; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; DEFAULT-NEXT: [[TMP26:%.*]] = sub i64 [[TMP6]], 1 +; DEFAULT-NEXT: [[TMP27:%.*]] = sub i64 [[TMP5]], 1 +; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP26]], [[TMP27]] ; DEFAULT-NEXT: [[TMP7:%.*]] = sub i64 [[C1]], [[B3]] -; DEFAULT-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP7]], [[TMP5]] +; DEFAULT-NEXT: [[TMP28:%.*]] = sub i64 [[TMP7]], 1 +; DEFAULT-NEXT: [[TMP29:%.*]] = sub i64 [[TMP5]], 1 +; DEFAULT-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP28]], [[TMP29]] ; DEFAULT-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; DEFAULT-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: @@ -1307,9 +1311,13 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16 ; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[A2]] -; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; PRED-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 +; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 +; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[C1]], [[B3]] -; PRED-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], [[TMP2]] +; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP4]], 1 +; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], 1 +; PRED-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP9]], [[TMP10]] ; PRED-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index efa64c661ebc0..c9f5fe2874d4c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -339,7 +339,8 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ARG21]], [[ARG3]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll index 2ace3b6898652..fa557ebf41baf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll @@ -10,9 +10,11 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -87,9 +89,11 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -164,9 +168,11 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -241,9 +247,11 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -318,9 +326,11 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -395,9 +405,11 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index e7ccde046d58e..06d5e82be34d0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -21,7 +21,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[DST1]], [[SRC2]] -; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; DEFAULT-NEXT: [[TMP14:%.*]] = sub i64 [[TMP6]], 1 +; DEFAULT-NEXT: [[TMP27:%.*]] = sub i64 [[TMP5]], 1 +; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP27]] ; DEFAULT-NEXT: br i1 [[DIFF_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -126,7 +128,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16 ; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC2]] -; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; PRED-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1 +; PRED-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], 1 +; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll index 625ca3b2b7284..0cb097c67ac7f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll @@ -31,7 +31,8 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; INTERLEAVE-2: vector.memcheck: ; INTERLEAVE-2-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] -; INTERLEAVE-2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; INTERLEAVE-2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 31 ; INTERLEAVE-2-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; INTERLEAVE-2: vector.main.loop.iter.check: ; INTERLEAVE-2-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[N]], 32 @@ -119,7 +120,8 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; INTERLEAVE-4: vector.memcheck: ; INTERLEAVE-4-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]] -; INTERLEAVE-4-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 +; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; INTERLEAVE-4-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 63 ; INTERLEAVE-4-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; INTERLEAVE-4: vector.main.loop.iter.check: ; INTERLEAVE-4-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[N]], 64 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll index 39ef5baa5b019..d30f533fc1fd3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-trip-count-decisions.ll @@ -53,10 +53,11 @@ for.end: ; preds = %for.body ret i32 0 } -; If trip-count is equal to 10, the function is vectorised when predicated tail folding is chosen +; If trip-count is equal to 10, the function is not vectorised as the runtime +; check cost is too high relative to the trip count. define i32 @foo_mid_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { ; CHECK-LABEL: @foo_mid_trip_count( -; PREDICATED: vector.body +; PREDICATED-NOT: vector.body ; SCALAR-NOT: vector.body entry: br label %for.body @@ -78,6 +79,32 @@ for.end: ; preds = %for.body ret i32 0 } +; If trip-count is equal to 13, the function is vectorised when predicated tail +; folding is chosen. +define i32 @foo_mid_trip_count_13(ptr %a, ptr %b, ptr %c, i32 %bound) { +; CHECK-LABEL: @foo_mid_trip_count_13( +; PREDICATED: vector.body +; SCALAR-NOT: vector.body +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx + %0 = load i8, ptr %a.index, align 1 + %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx + %1 = load i8, ptr %b.index, align 1 + %2 = add i8 %0, %1 + %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx + store i8 %2, ptr %c.index, align 1 + %inc = add nsw i32 %idx, 1 + %exitcond = icmp eq i32 %idx, %bound + br i1 %exitcond, label %for.end, label %for.body, !prof !3 + +for.end: ; preds = %for.body + ret i32 0 +} + ; If trip-count is equal to 40, the function is always vectorised define i32 @foo_high_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { ; CHECK-LABEL: @foo_high_trip_count( @@ -106,3 +133,4 @@ for.end: ; preds = %for.body !0 = !{!"branch_weights", i32 10, i32 30} !1 = !{!"branch_weights", i32 10, i32 90} !2 = !{!"branch_weights", i32 10, i32 390} +!3 = !{!"branch_weights", i32 10, i32 120} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll index 221e67041e121..664574f4bea3f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll @@ -126,14 +126,20 @@ define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, pt ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[OUT_B1]], [[OUT_A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP23]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[OUT_A2]], [[IN3]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[OUT_B1]], [[IN3]] -; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP6]], 1 +; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP12]], [[TMP13]] ; CHECK-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll index 6de0397edc568..dffedf7b034f3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll @@ -17,9 +17,11 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA510-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-CA510: [[VECTOR_MEMCHECK]]: ; CHECK-CA510-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[A2]] -; CHECK-CA510-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-CA510-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-CA510-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 31 ; CHECK-CA510-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[B3]] -; CHECK-CA510-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-CA510-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-CA510-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP7]], 31 ; CHECK-CA510-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-CA510-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-CA510: [[VECTOR_PH]]: @@ -81,9 +83,11 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA520-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-CA520: [[VECTOR_MEMCHECK]]: ; CHECK-CA520-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[A2]] -; CHECK-CA520-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-CA520-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-CA520-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 31 ; CHECK-CA520-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[B3]] -; CHECK-CA520-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-CA520-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-CA520-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP7]], 31 ; CHECK-CA520-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-CA520-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-CA520: [[VECTOR_PH]]: @@ -145,9 +149,11 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA320-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-CA320: [[VECTOR_MEMCHECK]]: ; CHECK-CA320-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[A2]] -; CHECK-CA320-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-CA320-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-CA320-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP13]], 31 ; CHECK-CA320-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[B3]] -; CHECK-CA320-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-CA320-NEXT: [[TMP14:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-CA320-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP14]], 31 ; CHECK-CA320-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-CA320-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-CA320: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 7d807277d9853..a94de66488b3e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -24,7 +24,9 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[D1]], [[S2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP5]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP9]], [[TMP10]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index ea2c092c49960..f80b1bde75a89 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -17,14 +17,17 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 3 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP2]], i32 12) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[UMAX]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP5]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], [[TMP14]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() @@ -85,14 +88,17 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 3 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[TMP2]] +; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP2]], i32 12) +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[UMAX]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP5]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], [[TMP14]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index bd4ea480c80c7..2b0f8159db099 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -15,7 +15,7 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N:%.*]], i64 1) ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 20) +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 30) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: @@ -23,22 +23,32 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[DST_21]], [[DST_12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP6]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[TMP5]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[DST_12]], [[SRC_13]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP24]], [[TMP25]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[DST_12]], [[SRC_25]] -; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP26:%.*]] = sub i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = sub i64 [[TMP9]], 1 +; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP26]], [[TMP28]] ; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]] ; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[DST_21]], [[SRC_13]] -; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP12]], [[TMP11]] +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 [[TMP11]], 1 +; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP18]], [[TMP19]] ; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[DST_21]], [[SRC_25]] -; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[TMP14]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[TMP13]], 1 +; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP22]], [[TMP23]] ; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 7194ff16df91f..89b8fbe9dafdb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -85,9 +85,9 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 7 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[DOTNEG:%.*]] = mul i64 [[TMP2]], -128 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[A2]], [[B1]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i64 [[TMP3]], [[DOTNEG]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index 71e9b6ac30df3..d0f9a19261ace 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -641,9 +641,9 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[ARRAY1:%.*]] = ptrtoaddr ptr [[ARRAY]] to i64 ; CHECK-NEXT: [[INDICES2:%.*]] = ptrtoaddr ptr [[INDICES]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[ARRAY1]], [[INDICES2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP3]], -16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[INDICES2]], [[ARRAY1]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i64 [[TMP4]], [[DOTNEG]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll index 74bcfcd85614c..c14fea9c52f88 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll @@ -22,7 +22,8 @@ define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 63 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 @@ -83,7 +84,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 63 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll index 0f469c51d4dd9..8f2ca33b22724 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-multiexit.ll @@ -15,11 +15,12 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B:%.*]] to i32 ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[N:%.*]], i32 999) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP1]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP4]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 @@ -86,11 +87,12 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B:%.*]] to i32 ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[N:%.*]], i32 999) ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP1]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP4]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll index a395dcdcbf7c6..3282b6a56c2ef 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll @@ -13,8 +13,8 @@ define void @arm_abs_q7(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 % ; CHECK-NEXT: br i1 [[CMP_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 16 -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16 +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PSRC2]], [[PDST1]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i32 [[TMP0]], -16 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -104,8 +104,8 @@ define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 ; CHECK-NEXT: br i1 [[CMP_NOT20]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8 -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16 +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PSRC2]], [[PDST1]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i32 [[TMP0]], -16 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -199,8 +199,8 @@ define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 ; CHECK-NEXT: br i1 [[CMP_NOT14]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PDST1]], [[PSRC2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 16 +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[PSRC2]], [[PDST1]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i32 [[TMP0]], -16 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll index 723d4f16e289e..157390ef0a9e1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll @@ -12,7 +12,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 22) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -20,10 +20,14 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP16]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP12]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP19]], [[TMP11]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -73,7 +77,7 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 22) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: @@ -81,10 +85,14 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; ZVFHMIN-NEXT: [[TMP12:%.*]] = sub i64 [[TMP6]], 1 +; ZVFHMIN-NEXT: [[TMP14:%.*]] = sub i64 [[TMP5]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], [[TMP14]] ; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; ZVFHMIN-NEXT: [[TMP16:%.*]] = sub i64 [[TMP8]], 1 +; ZVFHMIN-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP16]], [[TMP11]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: @@ -157,7 +165,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 2 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 22) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -165,10 +173,14 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP16]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP12]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP19]], [[TMP11]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -218,7 +230,7 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 22) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: @@ -226,10 +238,14 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; ZVFHMIN-NEXT: [[TMP12:%.*]] = sub i64 [[TMP6]], 1 +; ZVFHMIN-NEXT: [[TMP14:%.*]] = sub i64 [[TMP5]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], [[TMP14]] ; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; ZVFHMIN-NEXT: [[TMP16:%.*]] = sub i64 [[TMP8]], 1 +; ZVFHMIN-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP16]], [[TMP11]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: @@ -302,7 +318,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 22) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -310,10 +326,14 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP16]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP12]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP19]], [[TMP11]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -363,7 +383,7 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 22) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: @@ -371,10 +391,14 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; ZVFHMIN-NEXT: [[TMP12:%.*]] = sub i64 [[TMP6]], 1 +; ZVFHMIN-NEXT: [[TMP14:%.*]] = sub i64 [[TMP5]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], [[TMP14]] ; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; ZVFHMIN-NEXT: [[TMP16:%.*]] = sub i64 [[TMP8]], 1 +; ZVFHMIN-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP16]], [[TMP11]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: @@ -447,7 +471,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP8]], 1 -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 15) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP13]], i64 22) ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: @@ -455,10 +479,14 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP16]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP12]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP19]], [[TMP11]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -508,7 +536,7 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 -; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 15) +; ZVFHMIN-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 22) ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: @@ -516,10 +544,14 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; ZVFHMIN-NEXT: [[TMP12:%.*]] = sub i64 [[TMP6]], 1 +; ZVFHMIN-NEXT: [[TMP14:%.*]] = sub i64 [[TMP5]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], [[TMP14]] ; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; ZVFHMIN-NEXT: [[TMP16:%.*]] = sub i64 [[TMP8]], 1 +; ZVFHMIN-NEXT: [[TMP11:%.*]] = sub i64 [[TMP7]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP16]], [[TMP11]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: @@ -596,10 +628,14 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP18]] +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP18]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP19]] +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP19]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -651,10 +687,14 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; ZVFHMIN-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; ZVFHMIN-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP11]] ; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; ZVFHMIN-NEXT: [[TMP12:%.*]] = sub i64 [[TMP8]], 1 +; ZVFHMIN-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP12]], [[TMP9]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: @@ -729,10 +769,14 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP18]] +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP18]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP19]] +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP19]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -784,10 +828,14 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; ZVFHMIN-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; ZVFHMIN-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP11]] ; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; ZVFHMIN-NEXT: [[TMP12:%.*]] = sub i64 [[TMP8]], 1 +; ZVFHMIN-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], 1 +; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP12]], [[TMP9]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; ZVFHMIN: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 094500f07b418..c88fdcbe7f6cf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -175,7 +175,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] -; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] +; RV64-NEXT: [[TMP15:%.*]] = sub i64 [[TMP14]], 1 +; RV64-NEXT: [[TMP16:%.*]] = sub i64 [[TMP13]], 1 +; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP15]], [[TMP16]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] @@ -230,7 +232,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 4 ; RV32-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 ; RV32-NEXT: [[TMP6:%.*]] = sub i32 [[B1]], [[A2]] -; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] +; RV32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], 1 +; RV32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP5]], 1 +; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP7]], [[TMP8]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] @@ -295,7 +299,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 ; RV64-UF2-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] -; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] +; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], 1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], 1 +; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP16]], [[TMP15]] ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: ; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() @@ -400,7 +406,9 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 ; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] -; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] +; RV64-NEXT: [[TMP15:%.*]] = sub i64 [[TMP14]], 1 +; RV64-NEXT: [[TMP16:%.*]] = sub i64 [[TMP13]], 1 +; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP15]], [[TMP16]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: ; RV64-NEXT: br label %[[VECTOR_BODY:.*]] @@ -455,7 +463,9 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 4 ; RV32-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 ; RV32-NEXT: [[TMP6:%.*]] = sub i32 [[B1]], [[A2]] -; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] +; RV32-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], 1 +; RV32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP5]], 1 +; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP7]], [[TMP8]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: ; RV32-NEXT: br label %[[VECTOR_BODY:.*]] @@ -520,7 +530,9 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 ; RV64-UF2-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] -; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] +; RV64-UF2-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], 1 +; RV64-UF2-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], 1 +; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP16]], [[TMP15]] ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: ; RV64-UF2-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 642e28a4823e3..6b635797cdb6a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -615,7 +615,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NOSTRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NOSTRIDED-NEXT: [[TMP6:%.*]] = sub i64 [[P21]], [[P3]] -; NOSTRIDED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NOSTRIDED-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1 +; NOSTRIDED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 +; NOSTRIDED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP8]] ; NOSTRIDED-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED: vector.ph: ; NOSTRIDED-NEXT: br label [[VECTOR_BODY:%.*]] @@ -658,7 +660,7 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: [[P21:%.*]] = ptrtoaddr ptr [[P2:%.*]] to i64 ; NOSTRIDED-UF2-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-UF2-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 -; NOSTRIDED-UF2-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 12) +; NOSTRIDED-UF2-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NOSTRIDED-UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[UMAX]] ; NOSTRIDED-UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; NOSTRIDED-UF2: vector.scevcheck: @@ -669,7 +671,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8 ; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = sub i64 [[P21]], [[P3]] -; NOSTRIDED-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] +; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP5]], 1 +; NOSTRIDED-UF2-NEXT: [[TMP15:%.*]] = sub i64 [[TMP4]], 1 +; NOSTRIDED-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP15]] ; NOSTRIDED-UF2-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: ; NOSTRIDED-UF2-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index 06d4c24459945..1ca4b962637be 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -19,7 +19,9 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -68,7 +70,9 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -134,7 +138,9 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -183,7 +189,9 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -249,7 +257,9 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -298,7 +308,9 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -364,7 +376,9 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -413,7 +427,9 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -479,7 +495,9 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -528,7 +546,9 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -594,7 +614,9 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -643,7 +665,9 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -709,7 +733,9 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -758,7 +784,9 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -824,7 +852,9 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -873,7 +903,9 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -939,7 +971,9 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -988,7 +1022,9 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1054,7 +1090,9 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1103,7 +1141,9 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1169,7 +1209,9 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1218,7 +1260,9 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1284,7 +1328,9 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1333,7 +1379,9 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1399,7 +1447,9 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 ; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1448,7 +1498,9 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 ; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[TMP4]], 1 +; NO-VP-NEXT: [[TMP13:%.*]] = sub i64 [[TMP3]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP13]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() @@ -1517,7 +1569,9 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1567,7 +1621,9 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -1634,7 +1690,9 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1684,7 +1742,9 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -1751,7 +1811,9 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1801,7 +1863,9 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -1868,7 +1932,9 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1918,7 +1984,9 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -2038,7 +2106,9 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 ; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -2088,7 +2158,9 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index 62a200d17c8a2..0a7b933d72114 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -20,10 +20,14 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP24]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP23]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] -; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP26]], 1 +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 [[TMP25]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP10]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -71,7 +75,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 24) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -79,10 +83,14 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP18:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP18]], [[TMP19]] ; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] -; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; NO-VP-NEXT: [[TMP20:%.*]] = sub i64 [[TMP8]], 1 +; NO-VP-NEXT: [[TMP21:%.*]] = sub i64 [[TMP7]], 1 +; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -158,10 +166,14 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP24]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP23]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] -; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP26]], 1 +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 [[TMP25]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP10]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -209,7 +221,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 24) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -217,10 +229,14 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP18:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP18]], [[TMP19]] ; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] -; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; NO-VP-NEXT: [[TMP20:%.*]] = sub i64 [[TMP8]], 1 +; NO-VP-NEXT: [[TMP21:%.*]] = sub i64 [[TMP7]], 1 +; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -296,10 +312,14 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP24]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP23]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] -; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP26]], 1 +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 [[TMP25]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP10]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -347,7 +367,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 24) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -355,10 +375,14 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP18:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP18]], [[TMP19]] ; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] -; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; NO-VP-NEXT: [[TMP20:%.*]] = sub i64 [[TMP8]], 1 +; NO-VP-NEXT: [[TMP21:%.*]] = sub i64 [[TMP7]], 1 +; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -434,10 +458,14 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP24]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP23]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] -; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP26]], 1 +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 [[TMP25]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP10]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -485,7 +513,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP12]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 16) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP11]], i64 24) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -493,10 +521,14 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP18:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP19:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP18]], [[TMP19]] ; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] -; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; NO-VP-NEXT: [[TMP20:%.*]] = sub i64 [[TMP8]], 1 +; NO-VP-NEXT: [[TMP21:%.*]] = sub i64 [[TMP7]], 1 +; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -572,7 +604,9 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP21]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP20]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -622,7 +656,9 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -690,7 +726,9 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]] +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP6]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -740,7 +778,9 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -808,7 +848,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP24]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP23]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -862,7 +904,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP17:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], [[TMP17]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -936,7 +980,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP24]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP23]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -990,7 +1036,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP17:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], [[TMP17]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -1064,7 +1112,9 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] +; IF-EVL-NEXT: [[TMP6:%.*]] = sub i64 [[TMP21]], 1 +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP20]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1106,7 +1156,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP9]], 2 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 16) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1114,7 +1164,9 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP11]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -1194,11 +1246,12 @@ define void @log10(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[ENTRY:.*]]: ; NO-VP-NEXT: [[B2:%.*]] = ptrtoaddr ptr [[B]] to i64 ; NO-VP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 -; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 12 ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: ; NO-VP-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 +; NO-VP-NEXT: [[TMP5:%.*]] = sub i64 [[TMP1]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 15 ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index b891aea634f1c..d748472cb8ce6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -604,7 +604,9 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP8]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP9]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -654,7 +656,9 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP11]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -721,7 +725,9 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP8]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP9]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -771,7 +777,9 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP11]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -838,7 +846,9 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP8]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP9]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -888,7 +898,9 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP11]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -955,7 +967,9 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP8]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP9]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1005,7 +1019,9 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP10:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], [[TMP11]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() @@ -1072,7 +1088,9 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 ; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] -; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP8]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP9]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: ; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]] @@ -1114,7 +1132,7 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; NO-VP-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP10]], 1 -; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 16) +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP9]], i64 20) ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: @@ -1122,7 +1140,9 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 ; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] -; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; NO-VP-NEXT: [[TMP11:%.*]] = sub i64 [[TMP6]], 1 +; NO-VP-NEXT: [[TMP15:%.*]] = sub i64 [[TMP5]], 1 +; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], [[TMP15]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: ; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/X86/clmul.ll b/llvm/test/Transforms/LoopVectorize/X86/clmul.ll index 3299dc06ff462..f6e002b9ef620 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/clmul.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/clmul.ll @@ -11,13 +11,15 @@ define void @clmul_loop(ptr %a, ptr %b, ptr %c, i64 %n){ ; WITH-PCLMUL-NEXT: [[B3:%.*]] = ptrtoaddr ptr [[B]] to i64 ; WITH-PCLMUL-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64 ; WITH-PCLMUL-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i64 -; WITH-PCLMUL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; WITH-PCLMUL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 10 ; WITH-PCLMUL-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; WITH-PCLMUL: [[VECTOR_MEMCHECK]]: ; WITH-PCLMUL-NEXT: [[TMP0:%.*]] = sub i64 [[C1]], [[A2]] -; WITH-PCLMUL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; WITH-PCLMUL-NEXT: [[TMP11:%.*]] = sub i64 [[TMP0]], 1 +; WITH-PCLMUL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], 31 ; WITH-PCLMUL-NEXT: [[TMP1:%.*]] = sub i64 [[C1]], [[B3]] -; WITH-PCLMUL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; WITH-PCLMUL-NEXT: [[TMP12:%.*]] = sub i64 [[TMP1]], 1 +; WITH-PCLMUL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP12]], 31 ; WITH-PCLMUL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; WITH-PCLMUL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; WITH-PCLMUL: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll index 7ca84b7f0c6fa..9297ba56d77e4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll @@ -12,9 +12,11 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -89,9 +91,11 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -166,9 +170,11 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -243,9 +249,11 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -320,9 +328,11 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 15 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 15 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: @@ -390,9 +400,11 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 15 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 15 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index e2b713e868fa7..a4cec278487c2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -27,9 +27,11 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; AVX1: [[VECTOR_MEMCHECK]]: ; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; AVX1-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 31 ; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; AVX1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 31 ; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; AVX1: [[VECTOR_PH]]: @@ -60,9 +62,11 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; AVX2: [[VECTOR_MEMCHECK]]: ; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 +; AVX2-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 127 ; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 +; AVX2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 127 ; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AVX2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: @@ -139,9 +143,11 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; AVX512: [[VECTOR_MEMCHECK]]: ; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 +; AVX512-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 255 ; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 +; AVX512-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 255 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AVX512: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: @@ -248,9 +254,11 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX1-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; AVX1: [[VECTOR_MEMCHECK]]: ; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; AVX1-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 31 ; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; AVX1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 31 ; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; AVX1: [[VECTOR_PH]]: @@ -281,9 +289,11 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; AVX2: [[VECTOR_MEMCHECK]]: ; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 +; AVX2-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 127 ; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 +; AVX2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 127 ; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AVX2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: @@ -360,9 +370,11 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; AVX512: [[VECTOR_MEMCHECK]]: ; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 +; AVX512-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 255 ; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 +; AVX512-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 255 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AVX512: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: @@ -478,9 +490,11 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; AVX1: [[VECTOR_MEMCHECK]]: ; AVX1-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; AVX1-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 31 ; AVX1-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; AVX1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX1-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 31 ; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; AVX1: [[VECTOR_PH]]: @@ -512,9 +526,11 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; AVX2: [[VECTOR_MEMCHECK]]: ; AVX2-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 +; AVX2-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 127 ; AVX2-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 128 +; AVX2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX2-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 127 ; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AVX2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: @@ -596,9 +612,11 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; AVX512: [[VECTOR_MEMCHECK]]: ; AVX512-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[TRIGGER2]] -; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 256 +; AVX512-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; AVX512-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 255 ; AVX512-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B3]] -; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 256 +; AVX512-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; AVX512-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], 255 ; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AVX512: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: diff --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll index 60a35c8813e0d..d3f41aec3cdc9 100644 --- a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll @@ -23,15 +23,15 @@ define void @forked_ptrs_different_base_same_offset(ptr nocapture readonly %Base ; CHECK-NEXT: [[PREDS2:%.*]] = ptrtoaddr ptr [[PREDS:%.*]] to i64 ; CHECK-NEXT: [[BASE23:%.*]] = ptrtoaddr ptr [[BASE2:%.*]] to i64 ; CHECK-NEXT: [[BASE15:%.*]] = ptrtoaddr ptr [[BASE1:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DEST1]], [[PREDS2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DEST1]], [[BASE23]] +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[PREDS2]], [[DEST1]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i64 [[TMP0]], -16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[BASE23]], [[DEST1]] ; CHECK-NEXT: [[DOTFR:%.*]] = freeze i64 [[TMP1]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[DOTFR]], 16 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ugt i64 [[DOTFR]], -16 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DEST1]], [[BASE15]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[BASE15]], [[DEST1]] ; CHECK-NEXT: [[DOTFR10:%.*]] = freeze i64 [[TMP2]] -; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[DOTFR10]], 16 +; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ugt i64 [[DOTFR10]], -16 ; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX7]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/fpsat.ll b/llvm/test/Transforms/LoopVectorize/fpsat.ll index 5c103f70c2121..678076adc2c24 100644 --- a/llvm/test/Transforms/LoopVectorize/fpsat.ll +++ b/llvm/test/Transforms/LoopVectorize/fpsat.ll @@ -14,7 +14,8 @@ define void @signed(ptr %x, ptr %y, i32 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[Y1]], [[X2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 @@ -87,7 +88,8 @@ define void @unsigned(ptr %x, ptr %y, i32 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[Y1]], [[X2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll index c420157218496..309dded1ff1b9 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll @@ -76,7 +76,8 @@ define void @dont_hoist_variant_address(ptr %dst, ptr %src, i32 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[SRC2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll index 5ecd7e5feb7aa..76a0b927bcb2e 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads.ll @@ -87,12 +87,15 @@ define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) { ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[COND2]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = icmp ult i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = icmp ult i64 [[TMP3]], 7 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[SRC23]] -; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = icmp ult i64 [[TMP1]], 8 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = icmp ult i64 [[TMP7]], 7 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST1]], [[SRC15]] -; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = icmp ult i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = icmp ult i64 [[TMP8]], 7 ; CHECK-NEXT: [[CONFLICT_RDX10:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT9]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX10]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll index eb3a5e2a12f3d..3750d0afc87c4 100644 --- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll @@ -653,9 +653,11 @@ define i32 @sum_arrays_outside_use(ptr %B, ptr %A, ptr %C, i32 %N) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[_LR_PH_I:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[C1]], [[B2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP2]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP8]], 7 ; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[C1]], [[A3]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP3]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP3]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP5]], 7 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[_LR_PH_I]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 944a987bd04d0..ae3739d010695 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -70,7 +70,8 @@ define void @test2(ptr nocapture readonly %d) { ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[PTRINT2]], [[PTRINT]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], 31 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 736d8356d2219..d06f83ec0e139 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -719,7 +719,8 @@ define void @strided_ptr_iv_runtime_stride(ptr %pIn, ptr %pOut, i32 %nCols, i32 ; STRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; STRIDED: vector.memcheck: ; STRIDED-NEXT: [[TMP3:%.*]] = sub i64 [[POUT1]], [[PIN2]] -; STRIDED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 16 +; STRIDED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP3]], 1 +; STRIDED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], 15 ; STRIDED-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; STRIDED: vector.ph: ; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll index 66ab7939c3cac..89dbb46911fdd 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-select-runtime-checks.ll @@ -84,10 +84,12 @@ define void @test_loop_dependent_select1(ptr %src.1, ptr %src.2, ptr %dst, i1 %c ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC_12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP16]], 1 ; CHECK-NEXT: [[DIFF_CHECK_FR:%.*]] = freeze i1 [[DIFF_CHECK]] ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[DST1]], [[SRC_23]] -; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP17]], 1 ; CHECK-NEXT: [[DIFF_CHECK5_FR:%.*]] = freeze i1 [[DIFF_CHECK5]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK_FR]], [[DIFF_CHECK5_FR]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll index 40d036d8e1ca7..c72149cfd70b5 100644 --- a/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/preserve-dbg-loc-and-loop-metadata.ll @@ -479,7 +479,8 @@ define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X1]], [[Y2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 @@ -523,7 +524,8 @@ define void @widen_intrinsic_dbg(i64 %n, ptr %y, ptr %x) { ; DEBUGLOC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]], !dbg [[DBG94]] ; DEBUGLOC: [[VECTOR_MEMCHECK]]: ; DEBUGLOC-NEXT: [[TMP0:%.*]] = sub i64 [[X1]], [[Y2]], !dbg [[DBG94]] -; DEBUGLOC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16, !dbg [[DBG94]] +; DEBUGLOC-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1, !dbg [[DBG94]] +; DEBUGLOC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 15, !dbg [[DBG94]] ; DEBUGLOC-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]], !dbg [[DBG95:![0-9]+]] ; DEBUGLOC: [[VECTOR_PH]]: ; DEBUGLOC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index 55c73cb0928ff..c92257bbb30ff 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -119,7 +119,8 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) { ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoaddr ptr [[PTR_IV_1_LCSSA]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST3]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 1 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR_IV_1_LCSSA]], i64 1022 @@ -309,7 +310,8 @@ define void @expand_diff_neg_ptrtoint_expr(ptr %src, ptr %start) { ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[SRC2]] ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoaddr ptr [[TMP1]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP5]], [[TMP0]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -16 @@ -404,7 +406,8 @@ define void @scev_exp_reuse_const_add(ptr %dst, ptr %src) { ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 -2, [[SRC2]] ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoaddr ptr [[PTR_IV_1_NEXT_LCSSA]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 3 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR_IV_1_NEXT_LCSSA]], i64 80 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll index e4156e96a2138..3f8ededeb25cc 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll @@ -12,10 +12,10 @@ define void @add_ints(ptr nocapture %A, ptr nocapture %B, ptr nocapture %C) { ; CHECK-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; CHECK-NEXT: [[B2:%.*]] = ptrtoaddr ptr [[B]] to i64 ; CHECK-NEXT: [[C3:%.*]] = ptrtoaddr ptr [[C]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[B2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[C3]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B2]], [[A1]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i64 [[TMP0]], -16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[C3]], [[A1]] +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ugt i64 [[TMP1]], -16 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll index b230303576b7e..e903fca8a5f9a 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll @@ -23,7 +23,8 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 @@ -93,7 +94,8 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[A1]], [[B2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 @@ -169,7 +171,8 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) { ; CHECK-NEXT: br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], 16 +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index ea5f431603c6f..3764f2edf95e9 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -22,8 +22,8 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ss ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4, !dbg [[DBG9]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]], !dbg [[DBG9]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B2]], !dbg [[DBG9]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16, !dbg [[DBG9]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[B2]], [[A1]], !dbg [[DBG9]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i64 [[TMP1]], -16, !dbg [[DBG9]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg [[DBG9]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll index c64aee7f35fb1..4767096b3f0b0 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll @@ -20,36 +20,47 @@ define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32 +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[OFF_MUL_8]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP10]], 31 ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[OFF]], 4 -; CHECK-NEXT: [[DIFF_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK1:%.*]] = icmp ult i64 [[TMP13]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK1]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[OFF]], 24 -; CHECK-NEXT: [[DIFF_CHECK2:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK2:%.*]] = icmp ult i64 [[TMP16]], 31 ; CHECK-NEXT: [[CONFLICT_RDX3:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK2]] ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[OFF]], 5 -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP19]], 31 ; CHECK-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX3]], [[DIFF_CHECK4]] ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[OFF]], 40 -; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP3]], 32 +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP22]], 31 ; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX5]], [[DIFF_CHECK6]] ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[OFF]], 48 -; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP4]], 32 +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP25]], 31 ; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[OFF]], 56 -; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP5]], 32 +; CHECK-NEXT: [[TMP28:%.*]] = sub i64 [[TMP5]], 1 +; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP28]], 31 ; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]] ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[OFF]], 6 -; CHECK-NEXT: [[DIFF_CHECK12:%.*]] = icmp ult i64 [[TMP6]], 32 +; CHECK-NEXT: [[TMP31:%.*]] = sub i64 [[TMP6]], 1 +; CHECK-NEXT: [[DIFF_CHECK12:%.*]] = icmp ult i64 [[TMP31]], 31 ; CHECK-NEXT: [[CONFLICT_RDX13:%.*]] = or i1 [[CONFLICT_RDX11]], [[DIFF_CHECK12]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[OFF]], 72 -; CHECK-NEXT: [[DIFF_CHECK14:%.*]] = icmp ult i64 [[TMP7]], 32 +; CHECK-NEXT: [[TMP34:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-NEXT: [[DIFF_CHECK14:%.*]] = icmp ult i64 [[TMP34]], 31 ; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX13]], [[DIFF_CHECK14]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[OFF]], 80 -; CHECK-NEXT: [[DIFF_CHECK16:%.*]] = icmp ult i64 [[TMP8]], 32 +; CHECK-NEXT: [[TMP37:%.*]] = sub i64 [[TMP8]], 1 +; CHECK-NEXT: [[DIFF_CHECK16:%.*]] = icmp ult i64 [[TMP37]], 31 ; CHECK-NEXT: [[CONFLICT_RDX17:%.*]] = or i1 [[CONFLICT_RDX15]], [[DIFF_CHECK16]] ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[OFF]], 88 -; CHECK-NEXT: [[DIFF_CHECK18:%.*]] = icmp ult i64 [[TMP9]], 32 +; CHECK-NEXT: [[TMP40:%.*]] = sub i64 [[TMP9]], 1 +; CHECK-NEXT: [[DIFF_CHECK18:%.*]] = icmp ult i64 [[TMP40]], 31 ; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX17]], [[DIFF_CHECK18]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -220,9 +231,11 @@ define void @check_creation_order(ptr %a, ptr %b, i32 %m) { ; CHECK-NEXT: br label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[M_EXT]], -8 -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[A1]], [[B2]] -; CHECK-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP4]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK3]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll index 16afd3044d180..35cd701b2303b 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll @@ -13,7 +13,8 @@ define void @same_step_and_size(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; entry: @@ -44,7 +45,8 @@ define void @same_step_and_size_no_dominance_between_accesses(ptr %a, ptr %b, i6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[A1]], [[B2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; entry: @@ -120,7 +122,8 @@ define void @steps_match_but_different_access_sizes_1(ptr %a, ptr %b, i64 %n) { ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[B1]], -2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; entry: @@ -155,7 +158,8 @@ define void @steps_match_but_different_access_sizes_2(ptr %a, ptr %b, i64 %n) { ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[A1]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[B2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; entry: @@ -190,18 +194,23 @@ define void @steps_match_two_loadstores_different_access_sizes(ptr %src.1, ptr % ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DST_21]], [[DST_12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 31 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DST_12]], [[SRC_13]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP6]], 31 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST_12]], [[SRC_25]] -; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP7]], 31 ; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]] ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[DST_21]], [[SRC_13]] -; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP3]], 32 +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP8]], 31 ; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]] ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[DST_21]], [[SRC_25]] -; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP4]], 32 +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP9]], 31 ; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; @@ -352,7 +361,8 @@ define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr noca ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[SUB]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[SUB]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; entry: @@ -465,7 +475,8 @@ define void @remove_diff_checks_via_guards(i32 %x, i32 %y, ptr %A) { ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[OFFSET]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i64 [[TMP16]], 2 -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP17]], 16 +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 [[TMP17]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP18]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH1:label %.*]] ; entry: @@ -522,7 +533,8 @@ define void @diff_check_via_i32_ptrarith(ptr %origin, ptr %dst, ptr %base, i32 % ; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[BASE1]], [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[LHS]], [[TMP12]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP13]], 4 +; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP13]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], 3 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; entry: @@ -583,7 +595,8 @@ define void @phi_of_ptrtoint_diff_check(ptr %base, ptr %end, i64 %n, i1 %cond) { ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[N]], [[DST_PTR1]] ; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[DST_INT]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 3 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 1ac7087ae9143..2721ca00afa42 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -420,7 +420,8 @@ define void @diff_checks_src_start_invariant(ptr nocapture noundef writeonly %ds ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_N]], 4 @@ -1416,7 +1417,8 @@ define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr noca ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 15 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 853a19cc87747..c676a53729ca5 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -29,7 +29,9 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 ; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = add i64 [[B1]], -4 ; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[A2]] -; CHECK-VF4UF1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP9]], [[TMP7]] +; CHECK-VF4UF1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP9]], 1 +; CHECK-VF4UF1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-VF4UF1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP12]], [[TMP13]] ; CHECK-VF4UF1-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: ; CHECK-VF4UF1-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() @@ -92,7 +94,9 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 ; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = add i64 [[B1]], -4 ; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[A2]] -; CHECK-VF4UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP9]], [[TMP7]] +; CHECK-VF4UF2-NEXT: [[TMP13:%.*]] = sub i64 [[TMP9]], 1 +; CHECK-VF4UF2-NEXT: [[TMP19:%.*]] = sub i64 [[TMP7]], 1 +; CHECK-VF4UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP13]], [[TMP19]] ; CHECK-VF4UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: ; CHECK-VF4UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index 6a6f4dcc85923..12b6e7f05d897 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -147,7 +147,8 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: br i1 [[TMP13]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[C2]], [[A1]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], 32 +; CHECK-NEXT: [[TMP20:%.*]] = sub i64 [[TMP14]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP20]], 31 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll index dd1117039512b..d82bcf4bd8e1e 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll @@ -115,12 +115,15 @@ define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, pt ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[OUT_B1]], [[OUT_A2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 +; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP0]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP10]], 7 ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[OUT_A2]], [[IN3]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP1]], 1 +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i32 [[TMP11]], 7 ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[OUT_B1]], [[IN3]] -; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i32 [[TMP2]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP2]], 1 +; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i32 [[TMP12]], 7 ; CHECK-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll index 0f03d39f76c01..38d220b8682a0 100644 --- a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll +++ b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll @@ -17,7 +17,7 @@ define i32 @test1(ptr nocapture %a, ptr nocapture readonly %b) { ; CHECK-NOTBAA-LABEL: @test1 ; CHECK-NOTBAA: entry: -; CHECK-NOTBAA: icmp ult i64 +; CHECK-NOTBAA: icmp ugt i64 ; CHECK-NOTBAA-NOT: icmp ; CHECK-NOTBAA: br i1 {{.+}}, label %for.body, label %vector.body @@ -49,7 +49,7 @@ for.end: ; preds = %for.body define i32 @test2(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c) { ; CHECK-LABEL: @test2 ; CHECK: entry: -; CHECK: icmp ult i64 +; CHECK: icmp ugt i64 ; CHECK-NOT: icmp ; CHECK: br i1 {{.+}}, label %for.body, label %vector.body @@ -60,8 +60,8 @@ define i32 @test2(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr noca ; CHECK-NOTBAA-LABEL: @test2 ; CHECK-NOTBAA: entry: -; CHECK-NOTBAA: icmp ult i64 -; CHECK-NOTBAA: icmp ult i64 +; CHECK-NOTBAA: icmp ugt i64 +; CHECK-NOTBAA: icmp ugt i64 ; CHECK-NOTBAA-NOT: icmp ; CHECK-NOTBAA: br i1 {{.+}}, label %for.body, label %vector.body diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll index 4ea558a3a51da..9369e2c15725d 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -43,8 +43,8 @@ define void @loop(ptr %X, ptr %Y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X6:%.*]] = ptrtoaddr ptr [[X:%.*]] to i64 ; CHECK-NEXT: [[Y7:%.*]] = ptrtoaddr ptr [[Y:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X6]], [[Y7]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[Y7]], [[X6]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i64 [[TMP0]], -32 ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 820a7a801379e..f5b063033153f 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -22,8 +22,8 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { ; CHECK-NEXT: [[Y5:%.*]] = ptrtoaddr ptr [[Y]] to i64 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[Y5]], [[X4]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ugt i64 [[TMP0]], -128 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] ; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER9:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: