Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2199,6 +2199,8 @@ Value *llvm::addDiffRuntimeChecks(
// the compare, to allow detecting and re-using redundant compares.
DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
assert(IC * AccessSize > 0 &&
"Threshold must be non-zero to use diff-check");
Type *Ty = SinkStart->getType();
// Compute VF * IC * AccessSize.
auto *VFTimesICTimesSize =
Expand All @@ -2215,9 +2217,14 @@ Value *llvm::addDiffRuntimeChecks(
if (IsConflict)
continue;

IsConflict =
ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
// Use (Diff - 1) <u (Threshold - 1), equivalent to 0 < Diff <u Threshold,
// to exclude Diff == 0 (equal pointers are a safe).
auto *One = ConstantInt::get(Ty, 1);
IsConflict = ChkBuilder.CreateICmpULT(
ChkBuilder.CreateSub(Diff, One),
ChkBuilder.CreateSub(VFTimesICTimesSize, One), "diff.check");
SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});

if (NeedsFreeze)
IsConflict =
ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
Expand Down
8 changes: 5 additions & 3 deletions llvm/test/Transforms/LoopVectorize/AArch64/clmul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ define void @clmul_loop(ptr %a, ptr %b, ptr %c, i64 %n) {
; CHECK-NEXT: [[B3:%.*]] = ptrtoaddr ptr [[B]] to i64
; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64
; CHECK-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i64
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 6
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[C1]], [[A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP11]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[C1]], [[B3]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP12]], 31
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1253,9 +1253,13 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[C1]], [[A2]]
; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
; DEFAULT-NEXT: [[TMP26:%.*]] = sub i64 [[TMP6]], 1
; DEFAULT-NEXT: [[TMP27:%.*]] = sub i64 [[TMP5]], 1
; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP26]], [[TMP27]]
; DEFAULT-NEXT: [[TMP7:%.*]] = sub i64 [[C1]], [[B3]]
; DEFAULT-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP7]], [[TMP5]]
; DEFAULT-NEXT: [[TMP28:%.*]] = sub i64 [[TMP7]], 1
; DEFAULT-NEXT: [[TMP29:%.*]] = sub i64 [[TMP5]], 1
; DEFAULT-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP28]], [[TMP29]]
; DEFAULT-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; DEFAULT-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; DEFAULT: [[VECTOR_PH]]:
Expand Down Expand Up @@ -1307,9 +1311,13 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16
; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[A2]]
; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; PRED-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1
; PRED-NEXT: [[TMP5:%.*]] = sub i64 [[TMP2]], 1
; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[C1]], [[B3]]
; PRED-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], [[TMP2]]
; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP4]], 1
; PRED-NEXT: [[TMP10:%.*]] = sub i64 [[TMP2]], 1
; PRED-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP9]], [[TMP10]]
; PRED-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,8 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) {
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ARG21]], [[ARG3]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 15
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
Expand Down
36 changes: 24 additions & 12 deletions llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down Expand Up @@ -87,9 +89,11 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down Expand Up @@ -164,9 +168,11 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down Expand Up @@ -241,9 +247,11 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down Expand Up @@ -318,9 +326,11 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down Expand Up @@ -395,9 +405,11 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], 31
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP3]], 31
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[DST1]], [[SRC2]]
; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
; DEFAULT-NEXT: [[TMP14:%.*]] = sub i64 [[TMP6]], 1
; DEFAULT-NEXT: [[TMP27:%.*]] = sub i64 [[TMP5]], 1
; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP27]]
; DEFAULT-NEXT: br i1 [[DIFF_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
Expand Down Expand Up @@ -126,7 +128,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16
; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC2]]
; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; PRED-NEXT: [[TMP6:%.*]] = sub i64 [[TMP3]], 1
; PRED-NEXT: [[TMP7:%.*]] = sub i64 [[TMP2]], 1
; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]]
; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8
; INTERLEAVE-2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; INTERLEAVE-2: vector.memcheck:
; INTERLEAVE-2-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]]
; INTERLEAVE-2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
; INTERLEAVE-2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 31
; INTERLEAVE-2-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; INTERLEAVE-2: vector.main.loop.iter.check:
; INTERLEAVE-2-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[N]], 32
Expand Down Expand Up @@ -119,7 +120,8 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8
; INTERLEAVE-4-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; INTERLEAVE-4: vector.memcheck:
; INTERLEAVE-4-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[SRC2]]
; INTERLEAVE-4-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
; INTERLEAVE-4-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 63
; INTERLEAVE-4-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
; INTERLEAVE-4: vector.main.loop.iter.check:
; INTERLEAVE-4-NEXT: [[MIN_ITERS_CHECK3:%.*]] = icmp ult i64 [[N]], 64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ for.end: ; preds = %for.body
ret i32 0
}

; If trip-count is equal to 10, the function is vectorised when predicated tail folding is chosen
; If trip-count is equal to 10, the function is not vectorised as the runtime
; check cost is too high relative to the trip count.
define i32 @foo_mid_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) {
; CHECK-LABEL: @foo_mid_trip_count(
; PREDICATED: vector.body
; PREDICATED-NOT: vector.body
; SCALAR-NOT: vector.body
entry:
br label %for.body
Expand All @@ -78,6 +79,32 @@ for.end: ; preds = %for.body
ret i32 0
}

; If trip-count is equal to 13, the function is vectorised when predicated tail
; folding is chosen.
define i32 @foo_mid_trip_count_13(ptr %a, ptr %b, ptr %c, i32 %bound) {
; CHECK-LABEL: @foo_mid_trip_count_13(
; PREDICATED: vector.body
; SCALAR-NOT: vector.body
entry:
br label %for.body

for.body: ; preds = %for.body, %entry
%idx = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx
%0 = load i8, ptr %a.index, align 1
%b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx
%1 = load i8, ptr %b.index, align 1
%2 = add i8 %0, %1
%c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx
store i8 %2, ptr %c.index, align 1
%inc = add nsw i32 %idx, 1
%exitcond = icmp eq i32 %idx, %bound
br i1 %exitcond, label %for.end, label %for.body, !prof !3

for.end: ; preds = %for.body
ret i32 0
}

; If trip-count is equal to 40, the function is always vectorised
define i32 @foo_high_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) {
; CHECK-LABEL: @foo_high_trip_count(
Expand Down Expand Up @@ -106,3 +133,4 @@ for.end: ; preds = %for.body
!0 = !{!"branch_weights", i32 10, i32 30}
!1 = !{!"branch_weights", i32 10, i32 90}
!2 = !{!"branch_weights", i32 10, i32 390}
!3 = !{!"branch_weights", i32 10, i32 120}
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,20 @@ define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, pt
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[OUT_B1]], [[OUT_A2]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[TMP2]], 1
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP23]]
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 4
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[OUT_A2]], [[IN3]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP4]], 1
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP1]], 4
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[OUT_B1]], [[IN3]]
; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]]
; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP7]], 1
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 [[TMP6]], 1
; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP12]], [[TMP13]]
; CHECK-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) {
; CHECK-CA510-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK-CA510: [[VECTOR_MEMCHECK]]:
; CHECK-CA510-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[A2]]
; CHECK-CA510-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-CA510-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], 1
; CHECK-CA510-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 31
; CHECK-CA510-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[B3]]
; CHECK-CA510-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-CA510-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1
; CHECK-CA510-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP7]], 31
; CHECK-CA510-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-CA510-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-CA510: [[VECTOR_PH]]:
Expand Down Expand Up @@ -81,9 +83,11 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) {
; CHECK-CA520-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK-CA520: [[VECTOR_MEMCHECK]]:
; CHECK-CA520-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[A2]]
; CHECK-CA520-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-CA520-NEXT: [[TMP4:%.*]] = sub i64 [[TMP0]], 1
; CHECK-CA520-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 31
; CHECK-CA520-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[B3]]
; CHECK-CA520-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-CA520-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1
; CHECK-CA520-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP7]], 31
; CHECK-CA520-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-CA520-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-CA520: [[VECTOR_PH]]:
Expand Down Expand Up @@ -145,9 +149,11 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) {
; CHECK-CA320-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK-CA320: [[VECTOR_MEMCHECK]]:
; CHECK-CA320-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[A2]]
; CHECK-CA320-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-CA320-NEXT: [[TMP13:%.*]] = sub i64 [[TMP0]], 1
; CHECK-CA320-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP13]], 31
; CHECK-CA320-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[B3]]
; CHECK-CA320-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 32
; CHECK-CA320-NEXT: [[TMP14:%.*]] = sub i64 [[TMP1]], 1
; CHECK-CA320-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP14]], 31
; CHECK-CA320-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
; CHECK-CA320-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK-CA320: [[VECTOR_PH]]:
Expand Down
Loading
Loading