diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index c85ef3e131068..92ea3b53d59b4 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -491,11 +491,12 @@ struct PointerDiffInfo { const SCEV *SinkStart; unsigned AccessSize; bool NeedsFreeze; + Align AccessAlign; PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, - unsigned AccessSize, bool NeedsFreeze) + unsigned AccessSize, bool NeedsFreeze, Align AccessAlign) : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize), - NeedsFreeze(NeedsFreeze) {} + NeedsFreeze(NeedsFreeze), AccessAlign(AccessAlign) {} }; /// Holds information about the memory runtime legality checks to verify diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 5ef18fecabd99..0983850341a8c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -905,6 +905,9 @@ class TargetTransformInfoImplBase { switch (ICA.getID()) { default: break; + case Intrinsic::loop_dependence_raw_mask: + case Intrinsic::loop_dependence_war_mask: + return 10; case Intrinsic::allow_runtime_check: case Intrinsic::allow_ubsan_check: case Intrinsic::annotation: diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 1e48eeca72952..2f3317e533e12 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -634,7 +634,8 @@ addRuntimeChecks(Instruction *Loc, Loop *TheLoop, LLVM_ABI Value *addDiffRuntimeChecks( Instruction *Loc, ArrayRef Checks, SCEVExpander &Expander, - function_ref GetVF, unsigned IC); + ElementCount VF, unsigned IC, + function_ref UsesLoopDependenceMaskForAccessSize); /// Struct to hold information about a partially invariant condition. struct IVConditionInfo { diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index eae645ab84fff..f7535c44cf315 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -508,11 +508,16 @@ bool RuntimePointerChecking::tryToCreateDiffCheck( } } + // Find the minimum common alignment of all accesses. + Align AccessAlign = getLoadStoreAlignment(SrcInsts[0]); + for (Instruction *Inst : concat(SrcInsts, SinkInsts)) + AccessAlign = std::min(AccessAlign, getLoadStoreAlignment(Inst)); + LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n" << "SrcStart: " << *SrcStartInt << '\n' << "SinkStartInt: " << *SinkStartInt << '\n'); DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize, - Src->NeedsFreeze || Sink->NeedsFreeze); + Src->NeedsFreeze || Sink->NeedsFreeze, AccessAlign); return true; } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 2d7cb2a035957..5844e0fc78b8b 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -2193,40 +2193,70 @@ struct SCEVPtrToAddrRewriter : SCEVRewriteVisitor { Value *llvm::addDiffRuntimeChecks( Instruction *Loc, ArrayRef Checks, SCEVExpander &Expander, - function_ref GetVF, unsigned IC) { - + ElementCount VF, unsigned IC, + function_ref UsesLoopDependenceMaskForAccessSize) { LLVMContext &Ctx = Loc->getContext(); IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout())); ChkBuilder.SetInsertPoint(Loc); + Value *RuntimeVF = nullptr; // Our instructions might fold to a constant. Value *MemoryRuntimeCheck = nullptr; - auto &SE = *Expander.getSE(); const DataLayout &DL = Loc->getDataLayout(); SCEVPtrToAddrRewriter Rewriter(SE, DL); // Map to keep track of created compares, The key is the pair of operands for // the compare, to allow detecting and re-using redundant compares. DenseMap, Value *> SeenCompares; - for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) { + for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze, AccessAlign] : + Checks) { + Value *IsConflict; Type *Ty = SinkStart->getType(); - // Compute VF * IC * AccessSize. - auto *VFTimesICTimesSize = - ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()), - ConstantInt::get(Ty, IC * AccessSize)); + Type *CheckTy = ChkBuilder.getIntNTy(Ty->getScalarSizeInBits()); const SCEV *SinkStartRewritten = Rewriter.visit(SinkStart); const SCEV *SrcStartRewritten = Rewriter.visit(SrcStart); Value *Diff = Expander.expandCodeFor( SE.getMinusSCEV(SinkStartRewritten, SrcStartRewritten), Ty, Loc); - // Check if the same compare has already been created earlier. In that case, - // there is no need to check it again. - Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize}); - if (IsConflict) - continue; + VectorType *MaskTy = VectorType::get(ChkBuilder.getInt1Ty(), VF * IC); + if (!UsesLoopDependenceMaskForAccessSize(AccessSize) || + commonAlignment(AccessAlign, AccessSize) < AccessSize) { + // Compute VF * IC * AccessSize. + if (!RuntimeVF) + RuntimeVF = ChkBuilder.CreateElementCount(CheckTy, VF); + + auto *VFTimesICTimesSize = ChkBuilder.CreateMul( + RuntimeVF, ConstantInt::get(Ty, IC * AccessSize)); + // Check if the same compare has already been created earlier. In that + // case, there is no need to check it again. + if (SeenCompares.contains({Diff, VFTimesICTimesSize})) + continue; - IsConflict = - ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check"); - SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict}); + IsConflict = + ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check"); + SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict}); + } else { + Value *LoopAccessSize = ChkBuilder.getInt64(AccessSize); + if (SeenCompares.contains({Diff, LoopAccessSize})) + continue; + + // Note: This creates loop.dependence.war.mask(ptr null, ptr %diff). This + // allows SCEV to remove common offsets and avoids creating duplicate + // checks. If %diff is a sub, it can be folded into the mask. + Value *SrcPtr = ConstantPointerNull::get(PointerType::getUnqual(Ctx)); + Value *SinkPtr = ChkBuilder.CreateIntToPtr(Diff, ChkBuilder.getPtrTy()); + Value *Mask = ChkBuilder.CreateIntrinsic( + MaskTy, Intrinsic::loop_dependence_war_mask, + {SrcPtr, SinkPtr, LoopAccessSize}, {}, "loop.dep.mask"); + + Value *LastLaneIdx = ChkBuilder.CreateSub( + ChkBuilder.CreateElementCount(CheckTy, MaskTy->getElementCount()), + ChkBuilder.getIntN(Ty->getScalarSizeInBits(), 1)); + Value *NoConflict = + ChkBuilder.CreateExtractElement(Mask, LastLaneIdx, "no.conflict"); + + IsConflict = ChkBuilder.CreateNot(NoConflict, "is.conflict"); + SeenCompares.insert({{Diff, LoopAccessSize}, IsConflict}); + } if (NeedsFreeze) IsConflict = ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr"); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 164d74f1a440d..36d979babf649 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1887,15 +1887,13 @@ class GeneratedRTChecks { auto DiffChecks = RtPtrChecking.getDiffChecks(); if (DiffChecks) { - Value *RuntimeVF = nullptr; - MemRuntimeCheckCond = addDiffRuntimeChecks( - MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp, - [VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) { - if (!RuntimeVF) - RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF); - return RuntimeVF; - }, - IC); + LLVMContext &Ctx = MemCheckBlock->getContext(); + auto UseLoopDependenceMask = [&](unsigned AccessSize) { + return isLoopDependenceMaskCheap(Ctx, VF, IC, AccessSize); + }; + MemRuntimeCheckCond = + addDiffRuntimeChecks(MemCheckBlock->getTerminator(), *DiffChecks, + MemCheckExp, VF, IC, UseLoopDependenceMask); } else { MemRuntimeCheckCond = addRuntimeChecks( MemCheckBlock->getTerminator(), L, RtPtrChecking.getChecks(), @@ -1947,6 +1945,21 @@ class GeneratedRTChecks { OuterLoop = L->getParentLoop(); } + bool isLoopDependenceMaskCheap(LLVMContext &Ctx, ElementCount VF, unsigned IC, + unsigned AccessSize) { + if (ForceTargetInstructionCost.getNumOccurrences() > 0) + return ForceTargetInstructionCost <= 1; + VectorType *MaskTy = VectorType::get(Type::getInt1Ty(Ctx), VF * IC); + Value *AccessSizeVal = ConstantInt::get(Type::getInt64Ty(Ctx), AccessSize); + Value *NullPtr = ConstantPointerNull::get(PointerType::getUnqual(Ctx)); + // The pointer values should not change the cost. The access size (constant) + // is needed to by targets to cost the mask. + IntrinsicCostAttributes ICA(Intrinsic::loop_dependence_war_mask, MaskTy, + {NullPtr, NullPtr, AccessSizeVal}); + InstructionCost Cost = TTI->getIntrinsicInstrCost(ICA, CostKind); + return Cost.isValid() && Cost <= 1; + } + InstructionCost getCost() { if (SCEVCheckBlock || MemCheckBlock) LLVM_DEBUG(dbgs() << "Calculating cost of runtime checks:\n"); diff --git a/llvm/test/Analysis/CostModel/loop-dep-mask-no_info.ll b/llvm/test/Analysis/CostModel/loop-dep-mask-no_info.ll new file mode 100644 index 0000000000000..7b326c7b4a805 --- /dev/null +++ b/llvm/test/Analysis/CostModel/loop-dep-mask-no_info.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=code-size %s -S -o - | FileCheck %s --check-prefix=CHECK-SIZE +; RUN: opt -passes="print" 2>&1 -disable-output -cost-kind=throughput %s -S -o - | FileCheck %s --check-prefix=CHECK-THROUGHPUT + +define void @loop_dependence_war_mask(ptr %a, ptr %b) { +; CHECK-SIZE-LABEL: 'loop_dependence_war_mask' +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-THROUGHPUT-LABEL: 'loop_dependence_war_mask' +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1) + %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2) + %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4) + %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8) + ret void +} + +define void @loop_dependence_raw_mask(ptr %a, ptr %b) { +; CHECK-SIZE-LABEL: 'loop_dependence_raw_mask' +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; CHECK-THROUGHPUT-LABEL: 'loop_dependence_raw_mask' +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) +; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1) + %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2) + %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4) + %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8) + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/alias_mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/alias_mask.ll new file mode 100644 index 0000000000000..f3b47a6121d61 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/alias_mask.ll @@ -0,0 +1,347 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^middle.block:" --filter-out-after "^scalar.ph:" --version 4 +; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -mattr=+sve2 -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -force-vector-interleave=1 %s | FileCheck %s + +define void @alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) { +; CHECK-LABEL: define void @alias_mask( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B2:%.*]] = ptrtoaddr ptr [[B]] to i64 +; CHECK-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i64 +; CHECK-NEXT: br label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[C1]], [[B2]] +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr +; CHECK-NEXT: [[LOOP_DEP_MASK:%.*]] = call @llvm.loop.dependence.war.mask.nxv16i1(ptr null, ptr [[TMP1]], i64 1) +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP5]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP19]], 1 +; CHECK-NEXT: [[NO_CONFLICT:%.*]] = extractelement [[LOOP_DEP_MASK]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = xor i1 [[NO_CONFLICT]], true +; CHECK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP8]], 4 +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP24]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[N]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp ugt i64 [[N]], [[TMP14]] +; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP17]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP10]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP12]], ptr align 1 [[TMP16]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP13]]) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = xor i1 [[TMP23]], true +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; +entry: + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv + %load.a = load i8, ptr %gep.a, align 1 + %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv + %load.b = load i8, ptr %gep.b, align 1 + %add = add i8 %load.b, %load.a + %gep.c = getelementptr inbounds i8, ptr %c, i64 %iv + store i8 %add, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %for.body, %entry + ret void +} + +define i32 @alias_mask_read_after_write(ptr noalias %a, ptr %b, ptr %c, i64 %n) { +; CHECK-LABEL: define i32 @alias_mask_read_after_write( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C2:%.*]] = ptrtoaddr ptr [[C]] to i64 +; CHECK-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 +; CHECK-NEXT: br label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[C2]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP13]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP20]], 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP10]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP9]], 2 +; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP18:%.*]] = shl nuw i64 [[TMP27]], 2 +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[N]], [[TMP18]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i64 [[N]], [[TMP18]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP16]], i64 [[TMP15]], i64 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 2 [[TMP17]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr align 2 [[TMP19]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 2 [[TMP21]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP23:%.*]] = add [[WIDE_MASKED_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP24:%.*]] = add [[TMP23]], [[WIDE_MASKED_LOAD5]] +; CHECK-NEXT: [[TMP25]] = select [[ACTIVE_LANE_MASK]], [[TMP24]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP2]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]]) +; CHECK-NEXT: [[TMP28:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP26:%.*]] = xor i1 [[TMP28]], true +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %accum = phi i32 [ 0, %entry ], [ %add2, %for.body ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %load.a = load i32, ptr %gep.a, align 2 + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %load.a, ptr %gep.c, align 2 + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %load.b = load i32, ptr %gep.b, align 2 + %add = add i32 %load.a, %accum + %add2 = add i32 %add, %load.b + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %entry, %for.body + ret i32 %add2 +} + +define void @alias_mask_multiple(ptr %a, ptr %b, ptr %c, i64 %n) { +; CHECK-LABEL: define void @alias_mask_multiple( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B3:%.*]] = ptrtoaddr ptr [[B]] to i64 +; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64 +; CHECK-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i64 +; CHECK-NEXT: br label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[C1]], [[A2]] +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr +; CHECK-NEXT: [[LOOP_DEP_MASK:%.*]] = call @llvm.loop.dependence.war.mask.nxv16i1(ptr null, ptr [[TMP1]], i64 1) +; CHECK-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP31]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP32]], 1 +; CHECK-NEXT: [[NO_CONFLICT:%.*]] = extractelement [[LOOP_DEP_MASK]], i64 [[TMP2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = xor i1 [[NO_CONFLICT]], true +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[C1]], [[B3]] +; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NEXT: [[LOOP_DEP_MASK4:%.*]] = call @llvm.loop.dependence.war.mask.nxv16i1(ptr null, ptr [[TMP8]], i64 1) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP6]], 16 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[NO_CONFLICT5:%.*]] = extractelement [[LOOP_DEP_MASK4]], i64 [[TMP5]] +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = xor i1 [[NO_CONFLICT5]], true +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP19]], 4 +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP18]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[N]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp ugt i64 [[N]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i64 [[TMP21]], i64 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP25]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP26]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP27:%.*]] = add [[WIDE_MASKED_LOAD14]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP27]], ptr align 1 [[TMP28]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP29:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP29]], true +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; +entry: + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep.a = getelementptr inbounds i8, ptr %a, i64 %iv + %load.a = load i8, ptr %gep.a, align 1 + %gep.b = getelementptr inbounds i8, ptr %b, i64 %iv + %load.b = load i8, ptr %gep.b, align 1 + %add = add i8 %load.b, %load.a + %gep.c = getelementptr inbounds i8, ptr %c, i64 %iv + store i8 %add, ptr %gep.c, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %for.body, %entry + ret void +} + +define i32 @alias_mask_multiple_read_after_write(ptr %a, ptr %b, ptr %c, i64 %n) { +; CHECK-LABEL: define i32 @alias_mask_multiple_read_after_write( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B3:%.*]] = ptrtoaddr ptr [[B]] to i64 +; CHECK-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A]] to i64 +; CHECK-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i64 +; CHECK-NEXT: br label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[C1]], [[A2]] +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr +; CHECK-NEXT: [[LOOP_DEP_MASK:%.*]] = call @llvm.loop.dependence.war.mask.nxv4i1(ptr null, ptr [[TMP1]], i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP6]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP4]], 1 +; CHECK-NEXT: [[NO_CONFLICT:%.*]] = extractelement [[LOOP_DEP_MASK]], i64 [[TMP2]] +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = xor i1 [[NO_CONFLICT]], true +; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[B3]], [[C1]] +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[LOOP_DEP_MASK4:%.*]] = call @llvm.loop.dependence.war.mask.nxv4i1(ptr null, ptr [[TMP10]], i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP8]], 1 +; CHECK-NEXT: [[NO_CONFLICT5:%.*]] = extractelement [[LOOP_DEP_MASK4]], i64 [[TMP5]] +; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = xor i1 [[NO_CONFLICT5]], true +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP19]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP24]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = sub i64 [[N]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp ugt i64 [[N]], [[TMP20]] +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i64 [[TMP21]], i64 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP25]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[WIDE_MASKED_LOAD]], ptr align 4 [[TMP26]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD14:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 4 [[TMP27]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP28:%.*]] = add [[WIDE_MASKED_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP29:%.*]] = add [[TMP28]], [[WIDE_MASKED_LOAD14]] +; CHECK-NEXT: [[TMP30]] = select [[ACTIVE_LANE_MASK]], [[TMP29]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP31:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP32:%.*]] = xor i1 [[TMP31]], true +; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %accum = phi i32 [ 0, %entry ], [ %add2, %for.body ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %load.a = load i32, ptr %gep.a, align 4 + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %load.a, ptr %gep.c, align 4 + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %load.b = load i32, ptr %gep.b, align 4 + %add = add i32 %load.a, %accum + %add2 = add i32 %add, %load.b + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %entry, %for.body + ret i32 %add2 +} + +; Negative test: We can't use `loop.dependence.war.mask` with unaligned pointers. +define void @misaligned_alias_mask(ptr noalias %a, ptr %b, ptr %c, i64 %n) { +; CHECK-LABEL: define void @misaligned_alias_mask( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B2:%.*]] = ptrtoaddr ptr [[B]] to i64 +; CHECK-NEXT: [[C1:%.*]] = ptrtoaddr ptr [[C]] to i64 +; CHECK-NEXT: br label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[B2]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] +; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[N]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 2 [[TMP11]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr align 2 [[TMP12]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP13:%.*]] = add [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP13]], ptr align 2 [[TMP14]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.block: +; +entry: + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv + %load.a = load i32, ptr %gep.a, align 2 + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv + %load.b = load i32, ptr %gep.b, align 2 + %add = add i32 %load.b, %load.a + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv + store i32 %add, ptr %gep.c, align 2 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: ; preds = %for.body, %entry + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 1ef9b60f63bef..1bcece0e53e35 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -1263,9 +1263,9 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; DEFAULT: [[VECTOR_MEMCHECK]]: +; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[C1]], [[A2]] ; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[C1]], [[A2]] ; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; DEFAULT-NEXT: [[TMP7:%.*]] = sub i64 [[C1]], [[B3]] ; DEFAULT-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP7]], [[TMP5]] @@ -1317,9 +1317,9 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; PRED-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; PRED: [[VECTOR_MEMCHECK]]: +; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[A2]] ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16 -; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[A2]] ; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[C1]], [[B3]] ; PRED-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP4]], [[TMP2]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 8bbd3d6942cc7..9de29f6ace613 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -17,10 +17,10 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; DEFAULT: [[VECTOR_MEMCHECK]]: +; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[DST1]], [[SRC2]] ; DEFAULT-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; DEFAULT-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 -; DEFAULT-NEXT: [[TMP6:%.*]] = sub i64 [[DST1]], [[SRC2]] ; DEFAULT-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; DEFAULT-NEXT: br i1 [[DIFF_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: @@ -123,9 +123,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 ; PRED-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; PRED: [[VECTOR_MEMCHECK]]: +; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC2]] ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16 -; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[DST1]], [[SRC2]] ; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll index 6cd94d9907597..d3b42e73caa78 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-struct-return.ll @@ -132,17 +132,17 @@ define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, pt ; CHECK-NEXT: [[OUT_B1:%.*]] = ptrtoaddr ptr [[OUT_B]] to i64 ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[OUT_B1]], [[OUT_A2]] ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[OUT_B1]], [[OUT_A2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[OUT_A2]], [[IN3]] +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[OUT_B1]], [[IN3]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP1]], 4 ; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX6]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll index 7d807277d9853..db17b80761e31 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll @@ -20,10 +20,10 @@ define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef read ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[D1]], [[S2]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[D1]], [[S2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll index ea2c092c49960..8a19d15a6af04 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-multiexit.ll @@ -20,10 +20,10 @@ define void @multiple_exits_unique_exit_block(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -88,10 +88,10 @@ define i32 @multiple_exits_multiple_exit_blocks(ptr %A, ptr %B, i32 %N) #0 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll index bd4ea480c80c7..17794c5bbedb5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll @@ -19,25 +19,25 @@ define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], [[TMP2]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[DST_21]], [[DST_12]] ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 16 -; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[DST_21]], [[DST_12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[DST_12]], [[SRC_13]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[DST_12]], [[SRC_25]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP10]], [[TMP9]] ; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]] -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[DST_21]], [[SRC_13]] +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP12]], [[TMP11]] ; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[DST_21]], [[SRC_25]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP4]], 16 ; CHECK-NEXT: [[DIFF_CHECK10:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[CONFLICT_RDX11:%.*]] = or i1 [[CONFLICT_RDX9]], [[DIFF_CHECK10]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index f7ca39b8c920e..4a71bbc8fc8c4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -90,9 +90,9 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP2]], 7 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll index ee3144549bcbb..800b2db31dd88 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll @@ -307,7 +307,7 @@ define void @histogram_float(ptr noalias %buckets, ptr readonly %indices, i64 %N ; CHECK-NEXT: store float [[INC]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -350,7 +350,7 @@ define void @histogram_varying_increment(ptr noalias %buckets, ptr readonly %ind ; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14]] ; CHECK: for.exit: ; CHECK-NEXT: ret void ; @@ -406,7 +406,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP21]], i32 1, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -459,7 +459,7 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 { ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP11]], i32 1, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -513,7 +513,7 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32( [[TMP7]], i32 1, splat (i1 true)) ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -564,7 +564,7 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 -; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_EXIT:%.*]] ; CHECK: for.exit: @@ -604,11 +604,14 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK: vector.memcheck: ; CHECK-NEXT: [[ARRAY1:%.*]] = ptrtoaddr ptr [[ARRAY]] to i64 ; CHECK-NEXT: [[INDICES2:%.*]] = ptrtoaddr ptr [[INDICES]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[ARRAY1]], [[INDICES2]] +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[LOOP_DEP_MASK:%.*]] = call @llvm.loop.dependence.war.mask.nxv4i1(ptr null, ptr [[TMP10]], i64 4) ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[ARRAY1]], [[INDICES2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] -; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP3]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP5]], -1 +; CHECK-NEXT: [[NO_CONFLICT:%.*]] = extractelement [[LOOP_DEP_MASK]], i64 [[TMP4]] +; CHECK-NEXT: br i1 [[NO_CONFLICT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2 @@ -632,7 +635,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr % ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] @@ -725,7 +728,7 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64( [[TMP6]], i64 1, splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll index 723d4f16e289e..1526c23dc1a55 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll @@ -16,13 +16,13 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -77,13 +77,13 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: +; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -161,13 +161,13 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -222,13 +222,13 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: +; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -306,13 +306,13 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -367,13 +367,13 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: +; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -451,13 +451,13 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP14]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP15]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP4]], 8 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP16]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP7]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -512,13 +512,13 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 4096, [[TMP2]] ; ZVFHMIN-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: +; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 8 ; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -592,13 +592,13 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP19]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -647,13 +647,13 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: +; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 -; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -725,13 +725,13 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 ; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[TMP16]], 2 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP16]], 2 ; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], [[TMP19]] ; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -780,13 +780,13 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[OUTPUT1:%.*]] = ptrtoaddr ptr [[OUTPUT]] to i64 ; ZVFHMIN-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; ZVFHMIN: [[VECTOR_MEMCHECK]]: +; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; ZVFHMIN-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; ZVFHMIN-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 -; ZVFHMIN-NEXT: [[TMP6:%.*]] = sub i64 [[OUTPUT1]], [[INPUT12]] ; ZVFHMIN-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[TMP8:%.*]] = sub i64 [[OUTPUT1]], [[INPUT23]] +; ZVFHMIN-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 2 ; ZVFHMIN-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; ZVFHMIN-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; ZVFHMIN-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 33c96c5eb21b2..b9f1373d31c96 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -197,10 +197,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]] ; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; RV64: [[VECTOR_MEMCHECK]]: +; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: @@ -260,10 +260,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; RV32-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; RV32: [[VECTOR_MEMCHECK]]: +; RV32-NEXT: [[TMP6:%.*]] = sub i32 [[B1]], [[A2]] ; RV32-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; RV32-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 4 ; RV32-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 -; RV32-NEXT: [[TMP6:%.*]] = sub i32 [[B1]], [[A2]] ; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: @@ -331,10 +331,10 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]] ; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; RV64-UF2: [[VECTOR_MEMCHECK]]: +; RV64-UF2-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 -; RV64-UF2-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: @@ -448,10 +448,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP9]] ; RV64-NEXT: br i1 [[TMP8]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; RV64: [[VECTOR_MEMCHECK]]: +; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 4 -; RV64-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64: [[VECTOR_PH]]: @@ -511,10 +511,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; RV32-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; RV32: [[VECTOR_MEMCHECK]]: +; RV32-NEXT: [[TMP6:%.*]] = sub i32 [[B1]], [[A2]] ; RV32-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() ; RV32-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 4 ; RV32-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 -; RV32-NEXT: [[TMP6:%.*]] = sub i32 [[B1]], [[A2]] ; RV32-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i32 [[TMP6]], [[TMP5]] ; RV32-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; RV32: [[VECTOR_PH]]: @@ -582,10 +582,10 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-UF2-NEXT: [[TMP10:%.*]] = or i1 [[TMP7]], [[TMP9]] ; RV64-UF2-NEXT: br i1 [[TMP10]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; RV64-UF2: [[VECTOR_MEMCHECK]]: +; RV64-UF2-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-UF2-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() ; RV64-UF2-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4 ; RV64-UF2-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 -; RV64-UF2-NEXT: [[TMP14:%.*]] = sub i64 [[B1]], [[A2]] ; RV64-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP14]], [[TMP13]] ; RV64-UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; RV64-UF2: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 85d9a11cb65e0..1aa4371664ca8 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -611,10 +611,10 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1 ; NOSTRIDED-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; NOSTRIDED: vector.memcheck: +; NOSTRIDED-NEXT: [[TMP6:%.*]] = sub i64 [[P21]], [[P3]] ; NOSTRIDED-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NOSTRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NOSTRIDED-NEXT: [[TMP6:%.*]] = sub i64 [[P21]], [[P3]] ; NOSTRIDED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NOSTRIDED-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED: vector.ph: @@ -665,10 +665,10 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1 ; NOSTRIDED-UF2-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]] ; NOSTRIDED-UF2: vector.memcheck: +; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = sub i64 [[P21]], [[P3]] ; NOSTRIDED-UF2-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NOSTRIDED-UF2-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NOSTRIDED-UF2-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8 -; NOSTRIDED-UF2-NEXT: [[TMP5:%.*]] = sub i64 [[P21]], [[P3]] ; NOSTRIDED-UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP5]], [[TMP4]] ; NOSTRIDED-UF2-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; NOSTRIDED-UF2: vector.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index 06d4c24459945..5177a34186494 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -16,9 +16,9 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -65,9 +65,9 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -131,9 +131,9 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -180,9 +180,9 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -246,9 +246,9 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -295,9 +295,9 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -361,9 +361,9 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -410,9 +410,9 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -476,9 +476,9 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -525,9 +525,9 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -591,9 +591,9 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -640,9 +640,9 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -706,9 +706,9 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -755,9 +755,9 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -821,9 +821,9 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -870,9 +870,9 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -936,9 +936,9 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -985,9 +985,9 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1051,9 +1051,9 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1100,9 +1100,9 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1166,9 +1166,9 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1215,9 +1215,9 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1281,9 +1281,9 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1330,9 +1330,9 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1396,9 +1396,9 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 16 -; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1445,9 +1445,9 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP14]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; NO-VP-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], [[TMP3]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1513,10 +1513,10 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1563,10 +1563,10 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1630,10 +1630,10 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1680,10 +1680,10 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1747,10 +1747,10 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1797,10 +1797,10 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1864,10 +1864,10 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1914,10 +1914,10 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -2034,10 +2034,10 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[B1:%.*]] = ptrtoaddr ptr [[B]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 ; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 -; IF-EVL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[A2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -2084,10 +2084,10 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 100, [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[B1]], [[A2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index 62a200d17c8a2..f4b6bff6f706b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -16,13 +16,13 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] -; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] +; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -75,13 +75,13 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -154,13 +154,13 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] -; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] +; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -213,13 +213,13 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -292,13 +292,13 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] -; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] +; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -351,13 +351,13 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -430,13 +430,13 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] -; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP26:%.*]] = sub i64 [[A1]], [[C3]] +; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]] ; IF-EVL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; IF-EVL-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] @@ -489,13 +489,13 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] -; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[C3]] +; NO-VP-NEXT: [[TMP7:%.*]] = mul i64 [[TMP4]], 4 ; NO-VP-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; NO-VP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] ; NO-VP-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] @@ -568,10 +568,10 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -618,10 +618,10 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -686,10 +686,10 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -736,10 +736,10 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -804,10 +804,10 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -858,10 +858,10 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -932,10 +932,10 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP24:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -986,10 +986,10 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1060,10 +1060,10 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 ; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP5]], 4 -; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1110,10 +1110,10 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index b891aea634f1c..a7d798ecc115f 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -600,10 +600,10 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -650,10 +650,10 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -717,10 +717,10 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -767,10 +767,10 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -834,10 +834,10 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -884,10 +884,10 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -951,10 +951,10 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1001,10 +1001,10 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: @@ -1068,10 +1068,10 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A]] to i64 ; IF-EVL-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; IF-EVL: [[VECTOR_MEMCHECK]]: +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; IF-EVL-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 -; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[A1]], [[B2]] ; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP8]], [[TMP7]] ; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; IF-EVL: [[VECTOR_PH]]: @@ -1118,10 +1118,10 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] ; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; NO-VP: [[VECTOR_MEMCHECK]]: +; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() ; NO-VP-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 2 ; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 -; NO-VP-NEXT: [[TMP6:%.*]] = sub i64 [[A1]], [[B2]] ; NO-VP-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] ; NO-VP-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; NO-VP: [[VECTOR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll index c64aee7f35fb1..6a558e75eeee2 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -o - -S %s | FileCheck %s +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-target-instruction-cost=1 -force-vector-interleave=1 -o - -S %s | FileCheck %s --check-prefix=CHECK-LOOP-DEP ; Test case with a large number of pointer groups to check for memory ; conflicts, but with many redundant checks that can be simplified. @@ -149,6 +150,184 @@ define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; CHECK-LOOP-DEP-LABEL: @test_large_number_of_group( +; CHECK-LOOP-DEP-NEXT: entry: +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_2:%.*]] = shl i64 [[OFF:%.*]], 1 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_3:%.*]] = mul i64 [[OFF]], 3 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_4:%.*]] = shl i64 [[OFF]], 2 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_5:%.*]] = mul i64 [[OFF]], 5 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_6:%.*]] = mul i64 [[OFF]], 6 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_7:%.*]] = mul i64 [[OFF]], 7 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_8:%.*]] = shl i64 [[OFF]], 3 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_9:%.*]] = mul i64 [[OFF]], 9 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_10:%.*]] = mul i64 [[OFF]], 10 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_11:%.*]] = mul i64 [[OFF]], 11 +; CHECK-LOOP-DEP-NEXT: [[OFF_MUL_12:%.*]] = mul i64 [[OFF]], 12 +; CHECK-LOOP-DEP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 +; CHECK-LOOP-DEP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-LOOP-DEP: vector.memcheck: +; CHECK-LOOP-DEP-NEXT: [[TMP0:%.*]] = inttoptr i64 [[OFF_MUL_8]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK1:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP0]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT1:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK1]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT1:%.*]] = xor i1 [[NO_CONFLICT1]], true +; CHECK-LOOP-DEP-NEXT: [[TMP2:%.*]] = shl i64 [[OFF]], 4 +; CHECK-LOOP-DEP-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP3]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT:%.*]] = xor i1 [[NO_CONFLICT]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX1:%.*]] = or i1 [[IS_CONFLICT1]], [[IS_CONFLICT]] +; CHECK-LOOP-DEP-NEXT: [[TMP7:%.*]] = mul i64 [[OFF]], 24 +; CHECK-LOOP-DEP-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK2:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP4]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT3:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK2]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT4:%.*]] = xor i1 [[NO_CONFLICT3]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[CONFLICT_RDX1]], [[IS_CONFLICT4]] +; CHECK-LOOP-DEP-NEXT: [[TMP12:%.*]] = shl i64 [[OFF]], 5 +; CHECK-LOOP-DEP-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK5:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP6]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT6:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK5]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT7:%.*]] = xor i1 [[NO_CONFLICT6]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX8:%.*]] = or i1 [[CONFLICT_RDX]], [[IS_CONFLICT7]] +; CHECK-LOOP-DEP-NEXT: [[TMP17:%.*]] = mul i64 [[OFF]], 40 +; CHECK-LOOP-DEP-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP17]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK9:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP8]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT10:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK9]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT11:%.*]] = xor i1 [[NO_CONFLICT10]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX12:%.*]] = or i1 [[CONFLICT_RDX8]], [[IS_CONFLICT11]] +; CHECK-LOOP-DEP-NEXT: [[TMP22:%.*]] = mul i64 [[OFF]], 48 +; CHECK-LOOP-DEP-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP22]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK13:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP10]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT14:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK13]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT15:%.*]] = xor i1 [[NO_CONFLICT14]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX12]], [[IS_CONFLICT15]] +; CHECK-LOOP-DEP-NEXT: [[TMP27:%.*]] = mul i64 [[OFF]], 56 +; CHECK-LOOP-DEP-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP27]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK17:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP13]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT18:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK17]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT19:%.*]] = xor i1 [[NO_CONFLICT18]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX20:%.*]] = or i1 [[CONFLICT_RDX16]], [[IS_CONFLICT19]] +; CHECK-LOOP-DEP-NEXT: [[TMP32:%.*]] = shl i64 [[OFF]], 6 +; CHECK-LOOP-DEP-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP32]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK21:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP14]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT22:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK21]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT23:%.*]] = xor i1 [[NO_CONFLICT22]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX24:%.*]] = or i1 [[CONFLICT_RDX20]], [[IS_CONFLICT23]] +; CHECK-LOOP-DEP-NEXT: [[TMP37:%.*]] = mul i64 [[OFF]], 72 +; CHECK-LOOP-DEP-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP37]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK25:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP16]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT26:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK25]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT27:%.*]] = xor i1 [[NO_CONFLICT26]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX28:%.*]] = or i1 [[CONFLICT_RDX24]], [[IS_CONFLICT27]] +; CHECK-LOOP-DEP-NEXT: [[TMP42:%.*]] = mul i64 [[OFF]], 80 +; CHECK-LOOP-DEP-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP42]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK29:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP18]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT30:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK29]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT31:%.*]] = xor i1 [[NO_CONFLICT30]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX32:%.*]] = or i1 [[CONFLICT_RDX28]], [[IS_CONFLICT31]] +; CHECK-LOOP-DEP-NEXT: [[TMP47:%.*]] = mul i64 [[OFF]], 88 +; CHECK-LOOP-DEP-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP47]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK257:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP20]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT258:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK257]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT259:%.*]] = xor i1 [[NO_CONFLICT258]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX260:%.*]] = or i1 [[CONFLICT_RDX32]], [[IS_CONFLICT259]] +; CHECK-LOOP-DEP-NEXT: br i1 [[CONFLICT_RDX260]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-LOOP-DEP: vector.ph: +; CHECK-LOOP-DEP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-LOOP-DEP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-LOOP-DEP-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-LOOP-DEP: vector.body: +; CHECK-LOOP-DEP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-LOOP-DEP-NEXT: [[TMP167:%.*]] = add nsw i64 [[INDEX]], -5 +; CHECK-LOOP-DEP-NEXT: [[TMP168:%.*]] = add i64 [[TMP167]], [[OFF]] +; CHECK-LOOP-DEP-NEXT: [[TMP169:%.*]] = getelementptr i64, ptr [[DST:%.*]], i64 [[TMP168]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP169]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP170:%.*]] = add i64 [[TMP167]], [[OFF_MUL_2]] +; CHECK-LOOP-DEP-NEXT: [[TMP171:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP170]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP171]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP172:%.*]] = add i64 [[TMP167]], [[OFF_MUL_3]] +; CHECK-LOOP-DEP-NEXT: [[TMP173:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP172]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP173]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP174:%.*]] = add i64 [[TMP167]], [[OFF_MUL_4]] +; CHECK-LOOP-DEP-NEXT: [[TMP175:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP174]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP175]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP176:%.*]] = add i64 [[TMP167]], [[OFF_MUL_5]] +; CHECK-LOOP-DEP-NEXT: [[TMP177:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP176]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP177]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP178:%.*]] = add i64 [[TMP167]], [[OFF_MUL_6]] +; CHECK-LOOP-DEP-NEXT: [[TMP179:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP178]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP179]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP180:%.*]] = add i64 [[TMP167]], [[OFF_MUL_7]] +; CHECK-LOOP-DEP-NEXT: [[TMP181:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP180]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP181]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP182:%.*]] = add i64 [[TMP167]], [[OFF_MUL_8]] +; CHECK-LOOP-DEP-NEXT: [[TMP183:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP182]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP183]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP184:%.*]] = add i64 [[TMP167]], [[OFF_MUL_9]] +; CHECK-LOOP-DEP-NEXT: [[TMP185:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP184]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP185]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP186:%.*]] = add i64 [[TMP167]], [[OFF_MUL_10]] +; CHECK-LOOP-DEP-NEXT: [[TMP187:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP186]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP187]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP188:%.*]] = add i64 [[TMP167]], [[OFF_MUL_11]] +; CHECK-LOOP-DEP-NEXT: [[TMP189:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP188]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP189]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP190:%.*]] = add i64 [[TMP167]], [[OFF_MUL_12]] +; CHECK-LOOP-DEP-NEXT: [[TMP191:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP190]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> zeroinitializer, ptr [[TMP191]], align 8 +; CHECK-LOOP-DEP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-LOOP-DEP-NEXT: [[TMP192:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-LOOP-DEP-NEXT: br i1 [[TMP192]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-LOOP-DEP: middle.block: +; CHECK-LOOP-DEP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-LOOP-DEP-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-LOOP-DEP: scalar.ph: +; CHECK-LOOP-DEP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-LOOP-DEP-NEXT: br label [[LOOP:%.*]] +; CHECK-LOOP-DEP: loop: +; CHECK-LOOP-DEP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-LOOP-DEP-NEXT: [[IV_SUB_5:%.*]] = add nsw i64 [[IV]], -5 +; CHECK-LOOP-DEP-NEXT: [[IDX_1:%.*]] = add i64 [[IV_SUB_5]], [[OFF]] +; CHECK-LOOP-DEP-NEXT: [[GEP_1:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_1]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_1]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_2:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_2]] +; CHECK-LOOP-DEP-NEXT: [[GEP_2:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_2]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_2]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_3:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_3]] +; CHECK-LOOP-DEP-NEXT: [[GEP_3:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_3]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_3]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_4:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_4]] +; CHECK-LOOP-DEP-NEXT: [[GEP_4:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_4]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_4]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_5:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_5]] +; CHECK-LOOP-DEP-NEXT: [[GEP_5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_5]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_5]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_6:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_6]] +; CHECK-LOOP-DEP-NEXT: [[GEP_6:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_6]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_6]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_7:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_7]] +; CHECK-LOOP-DEP-NEXT: [[GEP_7:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_7]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_8:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_8]] +; CHECK-LOOP-DEP-NEXT: [[GEP_8:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_8]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_8]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_9:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_9]] +; CHECK-LOOP-DEP-NEXT: [[GEP_9:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_9]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_9]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_10:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_10]] +; CHECK-LOOP-DEP-NEXT: [[GEP_10:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_10]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_10]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_11:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_11]] +; CHECK-LOOP-DEP-NEXT: [[GEP_11:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_11]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_11]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IDX_12:%.*]] = add i64 [[IV_SUB_5]], [[OFF_MUL_12]] +; CHECK-LOOP-DEP-NEXT: [[GEP_12:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IDX_12]] +; CHECK-LOOP-DEP-NEXT: store double 0.000000e+00, ptr [[GEP_12]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-LOOP-DEP-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-LOOP-DEP-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-LOOP-DEP: exit: +; CHECK-LOOP-DEP-NEXT: ret void +; entry: %off.mul.2 = shl i64 %off, 1 %off.mul.3 = mul i64 %off, 3 @@ -259,6 +438,60 @@ define void @check_creation_order(ptr %a, ptr %b, i32 %m) { ; CHECK: exit: ; CHECK-NEXT: ret void ; +; CHECK-LOOP-DEP-LABEL: @check_creation_order( +; CHECK-LOOP-DEP-NEXT: entry: +; CHECK-LOOP-DEP-NEXT: [[A1:%.*]] = ptrtoaddr ptr [[A:%.*]] to i64 +; CHECK-LOOP-DEP-NEXT: [[A2:%.*]] = ptrtoaddr ptr [[A3:%.*]] to i64 +; CHECK-LOOP-DEP-NEXT: [[M_EXT:%.*]] = sext i32 [[M:%.*]] to i64 +; CHECK-LOOP-DEP-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr double, ptr [[A3]], i64 [[M_EXT]] +; CHECK-LOOP-DEP-NEXT: br label [[VECTOR_MEMCHECK:%.*]] +; CHECK-LOOP-DEP: vector.memcheck: +; CHECK-LOOP-DEP-NEXT: [[TMP1:%.*]] = mul nsw i64 [[M_EXT]], -8 +; CHECK-LOOP-DEP-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP2]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT:%.*]] = xor i1 [[NO_CONFLICT]], true +; CHECK-LOOP-DEP-NEXT: [[TMP8:%.*]] = sub i64 [[A2]], [[A1]] +; CHECK-LOOP-DEP-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-LOOP-DEP-NEXT: [[LOOP_DEP_MASK3:%.*]] = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr null, ptr [[TMP9]], i64 8) +; CHECK-LOOP-DEP-NEXT: [[NO_CONFLICT4:%.*]] = extractelement <4 x i1> [[LOOP_DEP_MASK3]], i64 3 +; CHECK-LOOP-DEP-NEXT: [[IS_CONFLICT5:%.*]] = xor i1 [[NO_CONFLICT4]], true +; CHECK-LOOP-DEP-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[IS_CONFLICT]], [[IS_CONFLICT5]] +; CHECK-LOOP-DEP-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-LOOP-DEP: vector.ph: +; CHECK-LOOP-DEP-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-LOOP-DEP: vector.body: +; CHECK-LOOP-DEP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-LOOP-DEP-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[INVARIANT_GEP]], i64 [[INDEX]] +; CHECK-LOOP-DEP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] +; CHECK-LOOP-DEP-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP4]], align 8 +; CHECK-LOOP-DEP-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[WIDE_LOAD]], [[WIDE_LOAD6]] +; CHECK-LOOP-DEP-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A3]], i64 [[INDEX]] +; CHECK-LOOP-DEP-NEXT: store <4 x double> [[TMP5]], ptr [[TMP6]], align 8 +; CHECK-LOOP-DEP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-LOOP-DEP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 31996 +; CHECK-LOOP-DEP-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-LOOP-DEP: middle.block: +; CHECK-LOOP-DEP-NEXT: br label [[SCALAR_PH]] +; CHECK-LOOP-DEP: scalar.ph: +; CHECK-LOOP-DEP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 31996, [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-LOOP-DEP-NEXT: br label [[LOOP:%.*]] +; CHECK-LOOP-DEP: loop: +; CHECK-LOOP-DEP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-LOOP-DEP-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[INVARIANT_GEP]], i64 [[IV]] +; CHECK-LOOP-DEP-NEXT: [[L_0:%.*]] = load double, ptr [[GEP]], align 8 +; CHECK-LOOP-DEP-NEXT: [[GEP_B:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[IV]] +; CHECK-LOOP-DEP-NEXT: [[L_1:%.*]] = load double, ptr [[GEP_B]], align 8 +; CHECK-LOOP-DEP-NEXT: [[ADD3:%.*]] = fadd double [[L_0]], [[L_1]] +; CHECK-LOOP-DEP-NEXT: [[GEP_A:%.*]] = getelementptr inbounds double, ptr [[A3]], i64 [[IV]] +; CHECK-LOOP-DEP-NEXT: store double [[ADD3]], ptr [[GEP_A]], align 8 +; CHECK-LOOP-DEP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-LOOP-DEP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 31999 +; CHECK-LOOP-DEP-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-LOOP-DEP: exit: +; CHECK-LOOP-DEP-NEXT: ret void +; entry: %m.ext = sext i32 %m to i64 %invariant.gep = getelementptr double, ptr %a, i64 %m.ext diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index 645c2742095bb..b25f3767b4a82 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -24,11 +24,11 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-VF4UF1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF1: [[VECTOR_MEMCHECK]]: +; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = add i64 [[B1]], -4 +; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[A2]] ; CHECK-VF4UF1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF4UF1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-VF4UF1-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 -; CHECK-VF4UF1-NEXT: [[TMP8:%.*]] = add i64 [[B1]], -4 -; CHECK-VF4UF1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[A2]] ; CHECK-VF4UF1-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP9]], [[TMP7]] ; CHECK-VF4UF1-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF1: [[VECTOR_PH]]: @@ -87,11 +87,11 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-VF4UF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK-VF4UF2: [[VECTOR_MEMCHECK]]: +; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = add i64 [[B1]], -4 +; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[A2]] ; CHECK-VF4UF2-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-VF4UF2-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4 ; CHECK-VF4UF2-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 -; CHECK-VF4UF2-NEXT: [[TMP8:%.*]] = add i64 [[B1]], -4 -; CHECK-VF4UF2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[A2]] ; CHECK-VF4UF2-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP9]], [[TMP7]] ; CHECK-VF4UF2-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK-VF4UF2: [[VECTOR_PH]]: