Skip to content
Merged
32 changes: 7 additions & 25 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8186,31 +8186,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(

VPlanTransforms::createLoopRegions(*Plan);

// Don't use getDecisionAndClampRange here, because we don't know the UF
// so this function is better to be conservative, rather than to split
// it up into different VPlans.
// TODO: Consider using getDecisionAndClampRange here to split up VPlans.
bool IVUpdateMayOverflow = false;
for (ElementCount VF : Range)
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);

TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
// Use NUW for the induction increment if we proved that it won't overflow in
// the vector loop or when not folding the tail. In the later case, we know
// that the canonical induction increment will not overflow as the vector trip
// count is >= increment and a multiple of the increment.
VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
bool HasNUW = !IVUpdateMayOverflow || Style == TailFoldingStyle::None;
if (!HasNUW) {
auto *IVInc =
LoopRegion->getExitingBasicBlock()->getTerminator()->getOperand(0);
assert(match(IVInc,
m_VPInstruction<Instruction::Add>(
m_Specific(LoopRegion->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
}

// ---------------------------------------------------------------------------
// Pre-construction: record ingredients whose recipes we'll need to further
// process after constructing the initial VPlan.
Expand Down Expand Up @@ -8250,6 +8225,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(

// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
HeaderVPBB);
Expand Down Expand Up @@ -8402,6 +8378,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
Builder))
return nullptr;

// TODO: Remove as IV can no longer overflow.
bool IVUpdateMayOverflow = false;
for (ElementCount VF : Range)
IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);

TailFoldingStyle Style = CM.getTailFoldingStyle(IVUpdateMayOverflow);
if (useActiveLaneMask(Style)) {
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
// TailFoldingStyle is visible there.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
; PRED-NEXT: store i32 0, ptr [[TMP12]], align 8
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]]
; PRED: [[PRED_STORE_CONTINUE5]]:
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; PRED-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; PRED-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; PRED: [[MIDDLE_BLOCK]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
; CHECK-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1227,7 +1227,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-INTERLEAVE1-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
; CHECK-INTERLEAVE1-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVE1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-INTERLEAVE1-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down Expand Up @@ -1483,7 +1483,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-INTERLEAVED-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
; CHECK-INTERLEAVED-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-INTERLEAVED-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-INTERLEAVED-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-INTERLEAVED-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down Expand Up @@ -1739,7 +1739,7 @@ define i32 @dotp_predicated(i64 %N, ptr %a, ptr %b) {
; CHECK-MAXBW-NEXT: [[TMP179:%.*]] = mul nsw <16 x i32> [[TMP178]], [[TMP97]]
; CHECK-MAXBW-NEXT: [[TMP180:%.*]] = select <16 x i1> [[TMP16]], <16 x i32> [[TMP179]], <16 x i32> zeroinitializer
; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP180]])
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16
; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-MAXBW-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
; CHECK-MAXBW-NEXT: [[TMP181:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-MAXBW-NEXT: br i1 [[TMP181]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[UMAX]])
; DATA-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]]
; DATA-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP10]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
; DATA-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP5]]
; DATA-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP5]]
; DATA-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
; DATA-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; DATA: middle.block:
Expand Down Expand Up @@ -102,7 +102,7 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features
; DATA_NO_LANEMASK-NEXT: [[TMP12:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
; DATA_NO_LANEMASK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]]
; DATA_NO_LANEMASK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[BROADCAST_SPLAT5]], ptr align 4 [[TMP13]], <vscale x 4 x i1> [[TMP12]])
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add i64 [[INDEX1]], [[TMP5]]
; DATA_NO_LANEMASK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX1]], [[TMP5]]
; DATA_NO_LANEMASK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC]]
; DATA_NO_LANEMASK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; DATA_NO_LANEMASK: middle.block:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/ARM/active-lane-mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ define void @f0(ptr noalias %dst, ptr readonly %src, i64 %n) #0 {
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16
; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP4]], ptr align 1 [[TMP6]], <16 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: call void @llvm.masked.store.v16i8.p0(<16 x i8> [[TMP5]], ptr align 1 [[TMP8]], <16 x i1> [[ACTIVE_LANE_MASK1]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
Expand Down Expand Up @@ -192,7 +192,7 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
Expand Down Expand Up @@ -239,7 +239,7 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
Expand Down Expand Up @@ -287,7 +287,7 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr align 4 [[TMP5]], <4 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define void @diff_checks(ptr nocapture noundef writeonly %dst, ptr nocapture nou
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP11]], <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison), !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i32 [[INDEX]]
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[WIDE_MASKED_LOAD]], ptr align 4 [[TMP12]], <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,7 @@ define i32 @red_mla_ext_s8_s16_s32(ptr noalias nocapture readonly %A, ptr noalia
; CHECK-NEXT: [[TMP5:%.*]] = select <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i32> [[TMP4]], <8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP7]] = add i32 [[VEC_PHI]], [[TMP6]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: for.cond.cleanup:
Expand Down Expand Up @@ -1276,7 +1276,7 @@ define i32 @red_mla_u8_s8_u32(ptr noalias nocapture readonly %A, ptr noalias noc
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP7]] = add i32 [[VEC_PHI]], [[TMP6]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_COND_CLEANUP]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK: for.cond.cleanup:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ define void @fn(i32 noundef %n, ptr %in, ptr %out) #0 {
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, <4 x ptr> [[VECTOR_GEP]], i32 2
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP21]], <4 x ptr> align 1 [[TMP30]], <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP29]], <4 x ptr> align 1 [[TMP31]], <4 x i1> [[ACTIVE_LANE_MASK]]), !alias.scope [[META3]], !noalias [[META0]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[PTR_IND3]] = getelementptr i8, ptr [[POINTER_PHI2]], i32 12
; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/ARM/mve-saddsatcost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ define void @arm_offset_q15(ptr nocapture readonly %pSrc, i16 signext %offset, p
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr align 2 [[NEXT_GEP]], <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_MASKED_LOAD]], <8 x i16> [[BROADCAST_SPLAT8]])
; CHECK-NEXT: call void @llvm.masked.store.v8i16.p0(<8 x i16> [[TMP0]], ptr align 2 [[NEXT_GEP6]], <8 x i1> [[ACTIVE_LANE_MASK]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP1]], label [[WHILE_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: while.end:
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
; ZVFBFMIN: [[VECTOR_PH]]:
; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]]
; ZVFBFMIN: [[VECTOR_BODY]]:
; ZVFBFMIN-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[CURRENT_ITERATION_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
; ZVFBFMIN-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP0]]
Expand All @@ -40,7 +40,7 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) {
; ZVFBFMIN-NEXT: [[TMP11:%.*]] = fadd <vscale x 8 x bfloat> [[WIDE_LOAD]], [[WIDE_LOAD1]]
; ZVFBFMIN-NEXT: call void @llvm.vp.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[TMP11]], ptr align 2 [[TMP1]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; ZVFBFMIN-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64
; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[TMP0]]
; ZVFBFMIN-NEXT: [[CURRENT_ITERATION_NEXT]] = add nuw i64 [[TMP13]], [[TMP0]]
; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]]
; ZVFBFMIN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; ZVFBFMIN-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down Expand Up @@ -122,7 +122,7 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
; ZVFBFMIN: [[VECTOR_PH]]:
; ZVFBFMIN-NEXT: br label %[[VECTOR_BODY:.*]]
; ZVFBFMIN: [[VECTOR_BODY]]:
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[CURRENT_ITERATION_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; ZVFBFMIN-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; ZVFBFMIN-NEXT: [[TMP7:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP6]]
Expand All @@ -136,10 +136,10 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64
; ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[TMP13]], <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[WIDE_LOAD2]])
; ZVFBFMIN-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[TMP15]], ptr align 4 [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP11]])
; ZVFBFMIN-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64
; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[TMP6]]
; ZVFBFMIN-NEXT: [[CURRENT_ITERATION_NEXT]] = add nuw i64 [[TMP12]], [[TMP6]]
; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]]
; ZVFBFMIN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; ZVFBFMIN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; ZVFBFMIN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; ZVFBFMIN: [[MIDDLE_BLOCK]]:
; ZVFBFMIN-NEXT: br label %[[EXIT:.*]]
; ZVFBFMIN: [[EXIT]]:
Expand Down
Loading
Loading