diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ec7d6b3472745..029e39b1c1ac0 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2423,20 +2423,6 @@ BasicBlock *InnerLoopVectorizer::createScalarPreheader(StringRef Prefix) { Twine(Prefix) + "scalar.ph"); } -/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV -/// expansion results. -static Value *getExpandedStep(const InductionDescriptor &ID, - const SCEV2ValueTy &ExpandedSCEVs) { - const SCEV *Step = ID.getStep(); - if (auto *C = dyn_cast(Step)) - return C->getValue(); - if (auto *U = dyn_cast(Step)) - return U->getValue(); - Value *V = ExpandedSCEVs.lookup(Step); - assert(V && "SCEV must be expanded at this point"); - return V; -} - /// Knowing that loop \p L executes a single vector iteration, add instructions /// that will get simplified and thus should not have any cost to \p /// InstsToIgnore. @@ -7340,73 +7326,6 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { return BestFactor; } -// If \p EpiResumePhiR is resume VPPhi for a reduction when vectorizing the -// epilog loop, fix the reduction's scalar PHI node by adding the incoming value -// from the main vector loop. -static void fixReductionScalarResumeWhenVectorizingEpilog( - VPPhi *EpiResumePhiR, PHINode &EpiResumePhi, BasicBlock *BypassBlock) { - using namespace VPlanPatternMatch; - // Get the VPInstruction computing the reduction result in the middle block. - // The first operand may not be from the middle block if it is not connected - // to the scalar preheader. In that case, there's nothing to fix. - VPValue *Incoming = EpiResumePhiR->getOperand(0); - match(Incoming, VPlanPatternMatch::m_ZExtOrSExt( - VPlanPatternMatch::m_VPValue(Incoming))); - auto *EpiRedResult = dyn_cast(Incoming); - if (!EpiRedResult) - return; - - VPValue *BackedgeVal; - bool IsFindIV = false; - if (EpiRedResult->getOpcode() == VPInstruction::ComputeAnyOfResult || - EpiRedResult->getOpcode() == VPInstruction::ComputeReductionResult) - BackedgeVal = EpiRedResult->getOperand(EpiRedResult->getNumOperands() - 1); - else if (matchFindIVResult(EpiRedResult, m_VPValue(BackedgeVal), m_VPValue())) - IsFindIV = true; - else - return; - - auto *EpiRedHeaderPhi = cast_if_present( - vputils::findRecipe(BackedgeVal, IsaPred)); - if (!EpiRedHeaderPhi) { - match(BackedgeVal, - VPlanPatternMatch::m_Select(VPlanPatternMatch::m_VPValue(), - VPlanPatternMatch::m_VPValue(BackedgeVal), - VPlanPatternMatch::m_VPValue())); - EpiRedHeaderPhi = cast( - vputils::findRecipe(BackedgeVal, IsaPred)); - } - - Value *MainResumeValue; - if (auto *VPI = dyn_cast(EpiRedHeaderPhi->getStartValue())) { - assert((VPI->getOpcode() == VPInstruction::Broadcast || - VPI->getOpcode() == VPInstruction::ReductionStartVector) && - "unexpected start recipe"); - MainResumeValue = VPI->getOperand(0)->getUnderlyingValue(); - } else - MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); - if (EpiRedResult->getOpcode() == VPInstruction::ComputeAnyOfResult) { - [[maybe_unused]] Value *StartV = - EpiRedResult->getOperand(0)->getLiveInIRValue(); - auto *Cmp = cast(MainResumeValue); - assert(Cmp->getPredicate() == CmpInst::ICMP_NE && - "AnyOf expected to start with ICMP_NE"); - assert(Cmp->getOperand(1) == StartV && - "AnyOf expected to start by comparing main resume value to original " - "start value"); - MainResumeValue = Cmp->getOperand(0); - } else if (IsFindIV) { - MainResumeValue = cast(MainResumeValue)->getFalseValue(); - } - PHINode *MainResumePhi = cast(MainResumeValue); - - // When fixing reductions in the epilogue loop we should already have - // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry - // over the incoming values correctly. - EpiResumePhi.setIncomingValueForBlock( - BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock)); -} - DenseMap LoopVectorizationPlanner::executePlan( ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue) { @@ -8991,35 +8910,9 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts) !EnableLoopVectorization) {} /// Prepare \p MainPlan for vectorizing the main vector loop during epilogue -/// vectorization. Remove ResumePhis from \p MainPlan for inductions that -/// don't have a corresponding wide induction in \p EpiPlan. -static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { - // Collect PHI nodes of widened phis in the VPlan for the epilogue. Those - // will need their resume-values computed in the main vector loop. Others - // can be removed from the main VPlan. - SmallPtrSet EpiWidenedPhis; - for (VPRecipeBase &R : - EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { - if (isa(&R)) - continue; - EpiWidenedPhis.insert( - cast(R.getVPSingleValue()->getUnderlyingValue())); - } - for (VPRecipeBase &R : - make_early_inc_range(MainPlan.getScalarHeader()->phis())) { - auto *VPIRInst = cast(&R); - if (EpiWidenedPhis.contains(&VPIRInst->getIRPhi())) - continue; - // There is no corresponding wide induction in the epilogue plan that would - // need a resume value. Remove the VPIRInst wrapping the scalar header phi - // together with the corresponding ResumePhi. The resume values for the - // scalar loop will be created during execution of EpiPlan. - VPRecipeBase *ResumePhi = VPIRInst->getOperand(0)->getDefiningRecipe(); - VPIRInst->eraseFromParent(); - ResumePhi->eraseFromParent(); - } - RUN_VPLAN_PASS(VPlanTransforms::removeDeadRecipes, MainPlan); - +/// vectorization. +static SmallVector +preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { using namespace VPlanPatternMatch; // When vectorizing the epilogue, FindFirstIV & FindLastIV reductions can // introduce multiple uses of undef/poison. If the reduction start value may @@ -9080,14 +8973,26 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { {VectorTC, MainPlan.getZero(Ty)}, {}, "vec.epilog.resume.val"); } else { ResumePhi = cast(&*ResumePhiIter); - if (MainScalarPH->begin() == MainScalarPH->end()) - ResumePhi->moveBefore(*MainScalarPH, MainScalarPH->end()); - else if (&*MainScalarPH->begin() != ResumePhi) + ResumePhi->setName("vec.epilog.resume.val"); + if (&MainScalarPH->front() != ResumePhi) ResumePhi->moveBefore(*MainScalarPH, MainScalarPH->begin()); } - // Add a user to to make sure the resume phi won't get removed. - VPBuilder(MainScalarPH) - .createNaryOp(VPInstruction::ResumeForEpilogue, ResumePhi); + + // Create a ResumeForEpilogue for the canonical IV resume as the + // first non-phi, to keep it alive for the epilogue. + VPBuilder ResumeBuilder(MainScalarPH); + ResumeBuilder.createNaryOp(VPInstruction::ResumeForEpilogue, ResumePhi); + + // Create ResumeForEpilogue instructions for the resume phis of the + // VPIRPhis in the scalar header of the main plan and return them so they can + // be used as resume values when vectorizing the epilogue. + return to_vector( + map_range(MainPlan.getScalarHeader()->phis(), [&](VPRecipeBase &R) { + assert(isa(R) && + "only VPIRPhis expected in the scalar header"); + return ResumeBuilder.createNaryOp(VPInstruction::ResumeForEpilogue, + R.getOperand(0)); + })); } /// Prepare \p Plan for vectorizing the epilogue loop. That is, re-use expanded @@ -9295,39 +9200,11 @@ static SmallVector preparePlanForEpilogueVectorLoop( return InstsToMove; } -// Generate bypass values from the additional bypass block. Note that when the -// vectorized epilogue is skipped due to iteration count check, then the -// resume value for the induction variable comes from the trip count of the -// main vector loop, passed as the second argument. -static Value *createInductionAdditionalBypassValues( - PHINode *OrigPhi, const InductionDescriptor &II, IRBuilder<> &BypassBuilder, - const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount, - Instruction *OldInduction) { - Value *Step = getExpandedStep(II, ExpandedSCEVs); - // For the primary induction the additional bypass end value is known. - // Otherwise it is computed. - Value *EndValueFromAdditionalBypass = MainVectorTripCount; - if (OrigPhi != OldInduction) { - auto *BinOp = II.getInductionBinOp(); - // Fast-math-flags propagate from the original induction instruction. - if (isa_and_nonnull(BinOp)) - BypassBuilder.setFastMathFlags(BinOp->getFastMathFlags()); - - // Compute the end value for the additional bypass. - EndValueFromAdditionalBypass = - emitTransformedIndex(BypassBuilder, MainVectorTripCount, - II.getStartValue(), Step, II.getKind(), BinOp); - EndValueFromAdditionalBypass->setName("ind.end"); - } - return EndValueFromAdditionalBypass; -} - -static void fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L, - VPlan &BestEpiPlan, - LoopVectorizationLegality &LVL, - const SCEV2ValueTy &ExpandedSCEVs, - Value *MainVectorTripCount) { - // Fix reduction resume values from the additional bypass block. +static void +fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L, + VPlan &BestEpiPlan, + ArrayRef ResumeValues) { + // Fix resume values from the additional bypass block. BasicBlock *PH = L->getLoopPreheader(); for (auto *Pred : predecessors(PH)) { for (PHINode &Phi : PH->phis()) { @@ -9338,54 +9215,36 @@ static void fixScalarResumeValuesFromBypass(BasicBlock *BypassBlock, Loop *L, } auto *ScalarPH = cast(BestEpiPlan.getScalarPreheader()); if (ScalarPH->hasPredecessors()) { - // If ScalarPH has predecessors, we may need to update its reduction - // resume values. - for (const auto &[R, IRPhi] : - zip(ScalarPH->phis(), ScalarPH->getIRBasicBlock()->phis())) { - fixReductionScalarResumeWhenVectorizingEpilog(cast(&R), IRPhi, - BypassBlock); - } - } - - // Fix induction resume values from the additional bypass block. - IRBuilder<> BypassBuilder(BypassBlock, BypassBlock->getFirstInsertionPt()); - for (const auto &[IVPhi, II] : LVL.getInductionVars()) { - Value *V = createInductionAdditionalBypassValues( - IVPhi, II, BypassBuilder, ExpandedSCEVs, MainVectorTripCount, - LVL.getPrimaryInduction()); - // TODO: Directly add as extra operand to the VPResumePHI recipe. - if (auto *Inc = dyn_cast(IVPhi->getIncomingValueForBlock(PH))) { - if (Inc->getBasicBlockIndex(BypassBlock) != -1) - Inc->setIncomingValueForBlock(BypassBlock, V); - } else { - // If the resume value in the scalar preheader was simplified (e.g., when - // narrowInterleaveGroups optimized away the resume PHIs), create a new - // PHI to merge the bypass value with the original value. - Value *OrigVal = IVPhi->getIncomingValueForBlock(PH); - PHINode *NewPhi = - PHINode::Create(IVPhi->getType(), pred_size(PH), "bc.resume.val", - PH->getFirstNonPHIIt()); - for (auto *Pred : predecessors(PH)) { - if (Pred == BypassBlock) - NewPhi->addIncoming(V, Pred); - else - NewPhi->addIncoming(OrigVal, Pred); - } - IVPhi->setIncomingValueForBlock(PH, NewPhi); + // Fix resume values for inductions and reductions from the additional + // bypass block using the incoming values from the main loop's resume phis. + // ResumeValues correspond 1:1 with the scalar loop header phis. + for (auto [ResumeV, HeaderPhi] : + zip(ResumeValues, BestEpiPlan.getScalarHeader()->phis())) { + auto *HeaderPhiR = cast(&HeaderPhi); + if (isa(HeaderPhiR->getIncomingValueForBlock(ScalarPH))) + continue; + auto *EpiResumePhi = + cast(HeaderPhiR->getIRPhi().getIncomingValueForBlock(PH)); + if (EpiResumePhi->getBasicBlockIndex(BypassBlock) == -1) + continue; + auto *MainResumePhi = cast(ResumeV->getUnderlyingValue()); + EpiResumePhi->setIncomingValueForBlock( + BypassBlock, MainResumePhi->getIncomingValueForBlock(BypassBlock)); } } } /// Connect the epilogue vector loop generated for \p EpiPlan to the main vector -// loop, after both plans have executed, updating branches from the iteration -// and runtime checks of the main loop, as well as updating various phis. \p -// InstsToMove contains instructions that need to be moved to the preheader of -// the epilogue vector loop. -static void connectEpilogueVectorLoop( - VPlan &EpiPlan, Loop *L, EpilogueLoopVectorizationInfo &EPI, - DominatorTree *DT, LoopVectorizationLegality &LVL, - DenseMap &ExpandedSCEVs, GeneratedRTChecks &Checks, - ArrayRef InstsToMove) { +/// loop, after both plans have executed, updating branches from the iteration +/// and runtime checks of the main loop, as well as updating various phis. \p +/// InstsToMove contains instructions that need to be moved to the preheader of +/// the epilogue vector loop. +static void connectEpilogueVectorLoop(VPlan &EpiPlan, Loop *L, + EpilogueLoopVectorizationInfo &EPI, + DominatorTree *DT, + GeneratedRTChecks &Checks, + ArrayRef InstsToMove, + ArrayRef ResumeValues) { BasicBlock *VecEpilogueIterationCountCheck = cast(EpiPlan.getEntry())->getIRBasicBlock(); @@ -9468,7 +9327,13 @@ static void connectEpilogueVectorLoop( // after executing the main loop. We need to update the resume values of // inductions and reductions during epilogue vectorization. fixScalarResumeValuesFromBypass(VecEpilogueIterationCountCheck, L, EpiPlan, - LVL, ExpandedSCEVs, EPI.VectorTripCount); + ResumeValues); + + // Remove dead phis that were moved to the epilogue preheader but are unused + // (e.g., resume phis for inductions not widened in the epilogue vector loop). + for (PHINode &Phi : make_early_inc_range(VecEpiloguePreHeader->phis())) + if (Phi.use_empty()) + Phi.eraseFromParent(); } bool LoopVectorizePass::processLoop(Loop *L) { @@ -9855,7 +9720,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { VPlan &BestEpiPlan = LVP.getPlanFor(EpilogueVF.Width); BestEpiPlan.getMiddleBlock()->setName("vec.epilog.middle.block"); BestEpiPlan.getVectorPreheader()->setName("vec.epilog.ph"); - preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan); + SmallVector ResumeValues = + preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan); EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1, BestEpiPlan); EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, @@ -9872,8 +9738,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { BestEpiPlan, L, ExpandedSCEVs, EPI, CM, *PSE.getSE()); LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT, true); - connectEpilogueVectorLoop(BestEpiPlan, L, EPI, DT, LVL, ExpandedSCEVs, - Checks, InstsToMove); + connectEpilogueVectorLoop(BestEpiPlan, L, EPI, DT, Checks, InstsToMove, + ResumeValues); ++LoopsEpilogueVectorized; } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, Checks, diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f8fa7d3c44ce1..561429523f264 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1321,6 +1321,12 @@ void VPInstruction::execute(VPTransformState &State) { "scalar value but not only first lane defined"); State.set(this, GeneratedValue, /*IsScalar*/ GeneratesPerFirstLaneOnly); + if (getOpcode() == VPInstruction::ResumeForEpilogue) { + // FIXME: This is a workaround to enable reliable updates of the scalar loop + // resume phis, when vectorizing the epilogue. Must be removed once epilogue + // vectorization explicitly connects VPlans. + setUnderlyingValue(GeneratedValue); + } } bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index 4eced1640bd14..ae1f7829449da 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -46,7 +46,6 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -84,7 +83,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index 44636222a8648..efa64c661ebc0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -446,6 +446,8 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -465,8 +467,6 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]] ; CHECK: vec.epilog.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index eea496303a206..680dfb83271cf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -39,7 +39,6 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -65,7 +64,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -212,7 +211,6 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: @@ -239,7 +237,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ] @@ -457,3 +455,103 @@ loop: exit: ret void } + +; Test that epilogue vectorization correctly handles loops where identical +; induction variables are CSE'd into a single resume phi, avoiding a count +; mismatch between ResumeValues and the epilogue's scalar preheader phis. +define i32 @cse_multiple_ivs_with_scalar_resume(ptr %src, i64 %N) { +; CHECK-LABEL: @cse_multiple_ivs_with_scalar_resume( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[SRC:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i64 2 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP1]], align 2 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[WIDE_LOAD]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i16> [[WIDE_LOAD3]] to <2 x i32> +; CHECK-NEXT: [[TMP5]] = or <2 x i32> [[TMP3]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP6]] = or <2 x i32> [[TMP4]], [[VEC_PHI2]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK: middle.block: +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i32> [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <2 x i32> [ [[TMP9]], [[VEC_EPILOG_PH]] ], [ [[TMP12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[INDEX6]] +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x i16>, ptr [[TMP10]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i16> [[WIDE_LOAD8]] to <2 x i32> +; CHECK-NEXT: [[TMP12]] = or <2 x i32> [[TMP11]], [[VEC_PHI7]] +; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP12]]) +; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]] +; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i32 [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV1_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV2:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[IV1]] +; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[GEP]], align 2 +; CHECK-NEXT: [[EXT:%.*]] = sext i16 [[VAL]] to i32 +; CHECK-NEXT: [[RDX_NEXT]] = or i32 [[EXT]], [[RDX]] +; CHECK-NEXT: [[IV1_NEXT]] = add i64 [[IV1]], 1 +; CHECK-NEXT: [[IV2_NEXT]] = add i64 [[IV2]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV1]], [[N]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], {{!llvm.loop ![0-9]+}} +; CHECK: exit: +; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], [[LOOP]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv1 = phi i64 [ 0, %entry ], [ %iv1.next, %loop ] + %iv2 = phi i64 [ 0, %entry ], [ %iv2.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %gep = getelementptr i16, ptr %src, i64 %iv1 + %val = load i16, ptr %gep, align 2 + %ext = sext i16 %val to i32 + %rdx.next = or i32 %ext, %rdx + %iv1.next = add i64 %iv1, 1 + %iv2.next = add i64 %iv2, 1 + %cond = icmp eq i64 %iv1, %N + br i1 %cond, label %exit, label %loop + +exit: + ret i32 %rdx.next +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll index feb0175e75542..dfa64041cc05b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll @@ -13,6 +13,10 @@ define double @fp128_fmuladd_reduction(ptr %start0, ptr %start1, ptr %end0, ptr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START0]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 8 +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START1]], i64 [[TMP7]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -54,10 +58,6 @@ define double @fp128_fmuladd_reduction(ptr %start0, ptr %start1, ptr %end0, ptr ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 16 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START0]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 8 -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[START1]], i64 [[TMP7]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 0a62ac9804524..38047541db5d9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -59,6 +59,8 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; COST1: [[VECTOR_PH]]: ; COST1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 32 ; COST1-NEXT: [[N_VEC:%.*]] = sub i64 [[START]], [[N_MOD_VF]] +; COST1-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] +; COST1-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] ; COST1-NEXT: br label %[[VECTOR_BODY:.*]] ; COST1: [[VECTOR_BODY]]: ; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -73,8 +75,6 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; COST1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]] ; COST1-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; COST1: [[VEC_EPILOG_ITER_CHECK]]: -; COST1-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] -; COST1-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] ; COST1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; COST1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] ; COST1: [[VEC_EPILOG_PH]]: @@ -112,6 +112,8 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; COST10: [[VECTOR_PH]]: ; COST10-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 16 ; COST10-NEXT: [[N_VEC:%.*]] = sub i64 [[START]], [[N_MOD_VF]] +; COST10-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] +; COST10-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] ; COST10-NEXT: br label %[[VECTOR_BODY:.*]] ; COST10: [[VECTOR_BODY]]: ; COST10-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -124,8 +126,6 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; COST10-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]] ; COST10-NEXT: br i1 [[CMP_N]], [[EXIT_LOOPEXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; COST10: [[VEC_EPILOG_ITER_CHECK]]: -; COST10-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] -; COST10-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] ; COST10-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; COST10-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] ; COST10: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index aba18314acf41..2e640e2d22658 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -582,6 +582,8 @@ define void@sext_sub_nsw_for_address(ptr %base, i64 %n, ptr %src) #0 { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 8 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END32:%.*]] = sub i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[TMP26:%.*]] = mul i32 [[DOTCAST]], 2 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -663,10 +665,6 @@ define void@sext_sub_nsw_for_address(ptr %base, i64 %n, ptr %src) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-NEXT: [[IND_END32:%.*]] = sub i64 [[N]], [[N_VEC]] -; CHECK-NEXT: [[DOTCAST33:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END34:%.*]] = mul i32 [[DOTCAST33]], 2 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF23:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -711,7 +709,7 @@ define void@sext_sub_nsw_for_address(ptr %base, i64 %n, ptr %src) #0 { ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL29:%.*]] = phi i64 [ [[TMP84]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL30:%.*]] = phi i64 [ [[TMP85]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END32]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ], [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL31:%.*]] = phi i32 [ [[TMP86]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END34]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL31:%.*]] = phi i32 [ [[TMP86]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP26]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL29]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] @@ -771,6 +769,8 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP87:%.*]] = mul i64 [[N_VEC]], 3 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP87]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -882,8 +882,6 @@ define i64 @live_out_extract_from_ptr_iv_increment(i64 %count, ptr %start, ptr n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[TMP87:%.*]] = mul i64 [[N_VEC]], 3 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP87]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8]] ; CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-runtime-checks.ll index 67826e5cb9f96..5e6834c093d8b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-runtime-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-runtime-checks.ll @@ -30,6 +30,8 @@ define void @interleave_groups_separated_by_offset(ptr %A, i64 %offset) { ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 1984 +; CHECK-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[A_OFFSET]], i64 1984 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -45,9 +47,7 @@ define void @interleave_groups_separated_by_offset(ptr %A, i64 %offset) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 1984 -; CHECK-NEXT: [[IND_END17:%.*]] = getelementptr i8, ptr [[A_OFFSET]], i64 1984 -; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[A]], i64 1984 @@ -60,7 +60,7 @@ define void @interleave_groups_separated_by_offset(ptr %A, i64 %offset) { ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX12]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT14]], 992 -; CHECK-NEXT: br i1 [[TMP6]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index 3eb42845bec4a..e957c1decc838 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -27,15 +27,21 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END8:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]] +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END10:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[TMP7]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[OFFSET_IDX2]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 @@ -51,12 +57,6 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END8:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]] -; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-NEXT: [[IND_END10:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-NEXT: [[IND_END13:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP7]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: @@ -153,13 +153,17 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[DOTCAST6:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END7:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST6]] +; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP1]], align 2 @@ -175,10 +179,6 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[DOTCAST6:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END7:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST6]] -; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index c340cfc9ad6cc..99db3ee324c27 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -70,6 +70,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4 ; CHECK-VS1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-VS1-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-VS1-NEXT: br label %[[VECTOR_BODY:.*]] @@ -87,7 +88,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-VS1-NEXT: br i1 [[CMP_N]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-VS1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-VS1-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS1-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-VS1-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK-VS1: [[VEC_EPILOG_PH]]: @@ -163,6 +163,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3 ; CHECK-VS2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-VS2-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[CONV]], i64 0 ; CHECK-VS2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-VS2-NEXT: br label %[[VECTOR_BODY:.*]] @@ -180,7 +181,6 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-VS2-NEXT: br i1 [[CMP_N]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-VS2: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-VS2-NEXT: [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]] ; CHECK-VS2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-VS2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK-VS2: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll index 22696d0b297d9..0066bb76ea834 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/neon-inloop-reductions.ll @@ -11,6 +11,7 @@ define i32 @mul_used_outside_vpexpression(ptr %src.0, ptr %src.1) { ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 96 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC_1]], i64 1 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: @@ -35,7 +36,6 @@ define i32 @mul_used_outside_vpexpression(ptr %src.0, ptr %src.1) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC_0]], i64 96 ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll index 5c2c67337625a..b1b00bdf84d83 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll @@ -70,6 +70,7 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16 ; CHECK-NEXT: [[IV_NEXT:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IDX_NEG]], [[IV_NEXT]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -91,7 +92,6 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[IV_NEXT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IDX_NEG]], [[IV_NEXT]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]] ; CHECK: vec.epilog.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll index a876b4553bcf7..766ce9ffce2e1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub-epilogue-vec.ll @@ -143,7 +143,7 @@ define i32 @sub_reduction(i32 %startval, ptr %src1, ptr %src2) #0 { ; CHECK-PARTIAL-RED-EPI-NEXT: br i1 false, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-PARTIAL-RED-EPI: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-PARTIAL-RED-EPI-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-PARTIAL-RED-EPI-NEXT: [[BC_MERGE_RDX8:%.*]] = phi i32 [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[STARTVAL]], %[[ITER_CHECK]] ] +; CHECK-PARTIAL-RED-EPI-NEXT: [[BC_MERGE_RDX8:%.*]] = phi i32 [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[STARTVAL]], %[[ITER_CHECK]] ] ; CHECK-PARTIAL-RED-EPI-NEXT: br label %[[LOOP:.*]] ; CHECK-PARTIAL-RED-EPI: [[LOOP]]: ; CHECK-PARTIAL-RED-EPI-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 83e4ca74e56fe..afd0fe4c9b39a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -349,10 +349,11 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP4]] ; CHECK-NEXT: store zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP6]], align 1 @@ -363,7 +364,6 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7]] ; CHECK: vec.epilog.ph: @@ -399,10 +399,11 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP4]], 1 ; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] +; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[N_VEC]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] +; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] ; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP4]] ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP6]], align 1 @@ -413,7 +414,6 @@ define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: -; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK-VF8: vec.epilog.ph: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll index 2896618cd853a..061132e2abdbb 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll @@ -387,10 +387,9 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr double, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: [[L:%.*]] = load double, ptr [[GEP_A]], align 8 ; CHECK-NEXT: [[DIV:%.*]] = fmul double [[L]], 5.000000e+00 @@ -543,6 +542,10 @@ define void @test_interleave_group_epilogue_with_preheader_phi(ptr %src, ptr %ds ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST_PRE]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP22]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -565,10 +568,6 @@ define void @test_interleave_group_epilogue_with_preheader_phi(ptr %src, ptr %ds ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[N_VEC]], 16 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST_PRE]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = mul i64 [[N_VEC]], 16 -; CHECK-NEXT: [[IND_END16:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP22]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -585,11 +584,10 @@ define void @test_interleave_group_epilogue_with_preheader_phi(ptr %src, ptr %ds ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[SRC]], %[[VECTOR_SCEVCHECK]] ], [ [[SRC]], %[[ITER_CHECK]] ], [ [[IND_END16]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[DST_PHI:%.*]] = phi ptr [ [[DST_NEXT:%.*]], %[[LOOP]] ], [ [[DST_PRE]], %[[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[SRC_PHI:%.*]] = phi ptr [ [[SRC_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[SRC_PHI:%.*]] = phi ptr [ [[SRC_NEXT:%.*]], %[[LOOP]] ], [ [[SRC]], %[[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: store double 1.000000e+00, ptr [[DST_PHI]], align 8 ; CHECK-NEXT: [[DST_IM:%.*]] = getelementptr i8, ptr [[DST_PHI]], i64 8 ; CHECK-NEXT: store double 1.000000e+00, ptr [[DST_IM]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll index 131c09801f1c9..3becb967ba109 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-scalable.ll @@ -254,6 +254,12 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst) ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP21]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[N_VEC]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -281,12 +287,6 @@ define void @test_masked_interleave_group(i32 %N, ptr %mask, ptr %src, ptr %dst) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[N_VEC]], 16 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 16 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[MASK]], i64 [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[UMAX]] ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF17:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll index 114ed5b0a4e5e..1e417a5588833 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll @@ -149,6 +149,7 @@ define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias % ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP5]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -194,7 +195,6 @@ define i32 @reverse_store_with_partial_reduction(ptr noalias %dst, ptr noalias % ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index 5380658a84653..5fc88479ac42a 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -23,6 +23,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 24 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -142,7 +143,6 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index 5c494af1289ef..cb384eaeb64fb 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -253,6 +253,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK: [[VECTOR_PH]]: ; VF-TWO-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 ; VF-TWO-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; VF-TWO-CHECK-NEXT: [[IND_END18:%.*]] = trunc i64 [[N_VEC]] to i32 ; VF-TWO-CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; VF-TWO-CHECK: [[VECTOR_BODY]]: ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -316,7 +317,6 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; VF-TWO-CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; VF-TWO-CHECK-NEXT: [[IND_END18:%.*]] = trunc i64 [[N_VEC]] to i32 ; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 ; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; VF-TWO-CHECK: [[VEC_EPILOG_PH]]: @@ -367,6 +367,7 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK: [[VECTOR_PH]]: ; VF-FOUR-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 ; VF-FOUR-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; VF-FOUR-CHECK-NEXT: [[IND_END18:%.*]] = trunc i64 [[N_VEC]] to i32 ; VF-FOUR-CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; VF-FOUR-CHECK: [[VECTOR_BODY]]: ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -430,7 +431,6 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; VF-FOUR-CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; VF-FOUR-CHECK-NEXT: [[IND_END18:%.*]] = trunc i64 [[N_VEC]] to i32 ; VF-FOUR-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; VF-FOUR-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; VF-FOUR-CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 94ebf01509ec2..da69c7a06a01d 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -35,7 +35,6 @@ define void @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwi ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END5:%.*]] = add i64 3, [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: @@ -63,7 +62,7 @@ define void @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwi ; CHECK-NEXT: [[CMP_N12:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N12]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 3, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 58b21f9bc816d..99efdf03f2912 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -84,6 +84,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END9:%.*]] = mul i64 [[N_VEC]], 32 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -222,7 +223,6 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], [[LOOPEXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END9:%.*]] = mul i64 [[N_VEC]], 32 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 1b336eb495c91..36680495a59be 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -53,7 +53,6 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END6:%.*]] = add i64 [[IV_START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: @@ -87,7 +86,7 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) { ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC4]] ; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i64 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[IV_START]], [[VECTOR_SCEVCHECK]] ], [ [[IV_START]], [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -175,8 +174,6 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[L]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[DOTCAST9:%.*]] = trunc i64 [[N_VEC]] to i16 -; CHECK-NEXT: [[IND_END10:%.*]] = mul i16 [[DOTCAST9]], [[TMP0]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: @@ -213,7 +210,7 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: br i1 [[CMP_N25]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i16 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i16 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index f964fc6f67854..a2dc1edf2345c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -56,9 +56,6 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]: -; AUTO_VEC-NEXT: [[DOTCAST12:%.*]] = sitofp i64 [[N_VEC]] to float -; AUTO_VEC-NEXT: [[TMP11:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST12]] -; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd fast float 1.000000e+00, [[TMP11]] ; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; AUTO_VEC: [[VEC_EPILOG_PH]]: @@ -87,7 +84,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]: ; AUTO_VEC-NEXT: [[BC_RESUME_VAL12:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; AUTO_VEC-NEXT: [[BC_RESUME_VAL13:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL13:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: ; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL12]], %[[VEC_EPILOG_SCALAR_PH]] ] @@ -349,9 +346,6 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]: -; AUTO_VEC-NEXT: [[DOTCAST16:%.*]] = sitofp i64 [[N_VEC]] to float -; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST16]] -; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd reassoc float 1.000000e+00, [[TMP12]] ; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; AUTO_VEC: [[VEC_EPILOG_PH]]: @@ -382,7 +376,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]: ; AUTO_VEC-NEXT: [[BC_RESUME_VAL16:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; AUTO_VEC-NEXT: [[BC_RESUME_VAL17:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] +; AUTO_VEC-NEXT: [[BC_RESUME_VAL17:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: ; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index aff665dad85a7..aa0de6ad15d70 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -679,6 +679,8 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512: vector.ph: ; AVX512-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 16 ; AVX512-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; AVX512-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 4 +; AVX512-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP23]] ; AVX512-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 64 ; AVX512-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP13]] ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] @@ -702,10 +704,6 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; AVX512: vec.epilog.iter.check: -; AVX512-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC]], 4 -; AVX512-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP23]] -; AVX512-NEXT: [[TMP38:%.*]] = mul i64 [[N_VEC]], 64 -; AVX512-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP38]] ; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF17:![0-9]+]] ; AVX512: vec.epilog.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll index ad6dfb054b726..bd1fc6eba4a2e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -23,15 +23,21 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END10:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[TMP13]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[OFFSET_IDX2]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 32 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 48 @@ -57,12 +63,6 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END10:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST1]] -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-NEXT: [[IND_END15:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[TMP13]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: @@ -153,13 +153,17 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 128 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[DOTCAST8:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[IND_END9:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST8]] +; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[N_VEC]] +; CHECK-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[N_VEC]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[OFFSET:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i8> [[BROADCAST_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[INDEX]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96 @@ -185,10 +189,6 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[DOTCAST8:%.*]] = trunc i64 [[N_VEC]] to i32 -; CHECK-NEXT: [[IND_END9:%.*]] = sub i32 [[BLOCKSIZE]], [[DOTCAST8]] -; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[PSRC]], i64 [[N_VEC]] -; CHECK-NEXT: [[IND_END14:%.*]] = getelementptr i8, ptr [[PDST]], i64 [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index 9ef4e205a970d..e318efdac5b32 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -67,9 +67,6 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP_LOOPEXIT99:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[TMP64:%.*]] = mul i64 [[N_VEC]], 2 -; CHECK-NEXT: [[IND_END9:%.*]] = add i64 8, [[TMP64]] -; CHECK-NEXT: [[IND_END12:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -110,8 +107,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N23:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[CMP_N23]], label %[[FOR_COND_CLEANUP_LOOPEXIT99]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i64 [ [[IND_END8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 8, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i64 [ [[IND_END11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i64 [ [[IND_END8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 8, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i64 [ [[IND_END11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[ITER_CHECK22]]: ; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP3]], -9 @@ -156,9 +153,6 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] ; CHECK-NEXT: br i1 [[CMP_N40]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK43:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK43]]: -; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 -; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] -; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_MOD_VF31]], 8 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label %[[VEC_EPILOG_SCALAR_PH42]], label %[[VEC_EPILOG_PH45]], !prof [[PROF3]] ; CHECK: [[VEC_EPILOG_PH45]]: @@ -204,8 +198,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N65:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] ; CHECK-NEXT: br i1 [[CMP_N65]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH42]] ; CHECK: [[VEC_EPILOG_SCALAR_PH42]]: -; CHECK-NEXT: [[BC_RESUME_VAL65:%.*]] = phi i64 [ [[IND_END54]], %[[VEC_EPILOG_MIDDLE_BLOCK63]] ], [ [[IND_END55]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 8, %[[ITER_CHECK22]] ] -; CHECK-NEXT: [[BC_RESUME_VAL66:%.*]] = phi i64 [ [[IND_END57]], %[[VEC_EPILOG_MIDDLE_BLOCK63]] ], [ [[IND_END58]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 0, %[[ITER_CHECK22]] ] +; CHECK-NEXT: [[BC_RESUME_VAL65:%.*]] = phi i64 [ [[IND_END54]], %[[VEC_EPILOG_MIDDLE_BLOCK63]] ], [ [[IND_END41]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 8, %[[ITER_CHECK22]] ] +; CHECK-NEXT: [[BC_RESUME_VAL66:%.*]] = phi i64 [ [[IND_END57]], %[[VEC_EPILOG_MIDDLE_BLOCK63]] ], [ [[IND_END43]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 0, %[[ITER_CHECK22]] ] ; CHECK-NEXT: br label %[[FOR_BODY_US:.*]] ; CHECK: [[FOR_BODY_US]]: ; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], %[[FOR_COND_CLEANUP4_US_LCSSA_US_US:.*]] ], [ [[BC_RESUME_VAL65]], %[[VEC_EPILOG_SCALAR_PH42]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll index 64c174fb3eb2b..014392e0bf3f8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory-epilogue-vec.ll @@ -53,10 +53,9 @@ define void @test_4xi64_epilogue_vec(ptr noalias %data, i64 noundef %n) { ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ITER_CHECK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds [[S4]], ptr [[DATA]], i64 [[IV]], i32 0 ; CHECK-NEXT: store i64 1, ptr [[P0]], align 8 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds [[S4]], ptr [[DATA]], i64 [[IV]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll index a8d68692428dc..52526906bdeb7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/transform-narrow-interleave-to-widen-memory.ll @@ -83,10 +83,9 @@ define void @test_4xi64(ptr noalias %data, ptr noalias %factor, i64 noundef %n) ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[N_VEC1]], %[[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[DATA_2:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[IV1]] ; CHECK-NEXT: [[L_2:%.*]] = load i64, ptr [[DATA_2]], align 8 ; CHECK-NEXT: [[DATA_0:%.*]] = getelementptr inbounds { i64, i64, i64, i64 }, ptr [[DATA]], i64 [[IV1]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll index 35ef01379c756..5d7b01943a0c8 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-iv-select-cmp.ll @@ -241,7 +241,6 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -279,7 +278,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N14]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i8 [ [[TMP13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i8 [ [[TMP13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP3]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX14:%.*]] = phi i8 [ [[RDX_SELECT13]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: @@ -396,7 +395,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) { ; CHECK-NEXT: br i1 [[CMP_N21]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL22:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i64 [ [[TMP21]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX23:%.*]] = phi i64 [ [[TMP21]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX24:%.*]] = phi i64 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index 0607ec38e7e46..e5377cbfc35d3 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -225,7 +225,6 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END6:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: @@ -263,7 +262,7 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ false, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -318,6 +317,8 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[N_VEC]], 16 +; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP24]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -356,8 +357,6 @@ define i1 @any_of_reduction_i1_epilog2(ptr %start, ptr %end, i64 %x) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[N_VEC]], 16 -; CHECK-NEXT: [[IND_END9:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP24]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll index 76f30decf81e7..4a0dbf893ca79 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll @@ -699,7 +699,6 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -727,8 +726,8 @@ define i1 @reduction_with_const_or(ptr %A, i8 %n) { ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ true, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ false, %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i8 [ [[TMP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ true, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ true, %[[VEC_EPILOG_ITER_CHECK]] ], [ false, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i8 [ [[TMP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[OR_RED:%.*]] = phi i1 [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[OR_NEXT:%.*]], %[[LOOP]] ] @@ -787,7 +786,6 @@ define i16 @test_no_op_or_reduction_single_vector_iteration(i64 %N) { ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: [[VEC_EPILOG_PH]]: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[TMP0]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[CLAMPED]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[CLAMPED]], [[N_MOD_VF2]] @@ -830,3 +828,96 @@ loop: exit: ret i16 %red.next } + +; Test case for https://github.com/llvm/llvm-project/issues/179407. +define i32 @test_foldable_reduction(i64 %N) { +; CHECK-LABEL: define i32 @test_foldable_reduction( +; CHECK-SAME: i64 [[N:%.*]]) { +; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[VEC_PHI]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP3]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP4]], i32 0, i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK]]: +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK: [[VEC_EPILOG_PH]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF3]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0 +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[TMP5]], %[[VEC_EPILOG_PH]] ], [ [[VEC_PHI6]], %[[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[VEC_PHI6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]] +; CHECK-NEXT: [[RDX_SELECT8:%.*]] = select i1 [[TMP9]], i32 0, i32 0 +; CHECK-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC4]] +; CHECK-NEXT: br i1 [[CMP_N9]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; CHECK: [[VEC_EPILOG_SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[TMP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi i32 [ [[RDX_SELECT8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED_1:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_1_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED_2:%.*]] = phi i32 [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[RED_2_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: [[RED_2_NEXT]] = select i1 [[ICMP]], i32 0, i32 [[RED_2]] +; CHECK-NEXT: [[RED_1_NEXT]] = or i32 [[RED_1]], 0 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[R_1:%.*]] = phi i32 [ [[RED_2_NEXT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[RDX_SELECT8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[R_2:%.*]] = phi i32 [ [[RED_1_NEXT]], %[[LOOP]] ], [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[R:%.*]] = add i32 [[R_1]], [[R_2]] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red.1 = phi i32 [ 0, %entry ], [ %red.1.next, %loop ] + %red.2 = phi i32 [ 0, %entry ], [ %red.2.next, %loop ] + %icmp = icmp eq i32 0, 0 + %red.2.next = select i1 %icmp, i32 0, i32 %red.2 + %red.1.next= or i32 %red.1, 0 + %iv.next = add i64 %iv, 1 + %ec = icmp ne i64 %iv, %N + br i1 %ec, label %loop, label %exit + +exit: + %r.1 = phi i32 [ %red.2.next, %loop ] + %r.2 = phi i32 [ %red.1.next, %loop ] + %r = add i32 %r.1, %r.2 + ret i32 %r +} diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index 143da205f18ba..b20a3b0063834 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -165,6 +165,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END4:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -186,7 +187,6 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END4:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: [[VEC_EPILOG_PH]]: @@ -469,7 +469,6 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br i1 true, label %[[OUTER_LATCH]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END5:%.*]] = mul i8 84, [[INDUCTION_IV]] ; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -499,7 +498,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-NEXT: br i1 true, label %[[OUTER_LATCH]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ 85, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 85, %[[VEC_EPILOG_ITER_CHECK]] ], [ 1, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i8 [ [[IND_END4]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i8 [ [[IND_END4]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[INNER:.*]] ; CHECK: [[INNER]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ] @@ -555,7 +554,6 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT: [[MIDDLE_BLOCK]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label %[[OUTER_LATCH]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END5:%.*]] = mul i8 84, [[INDUCTION_IV]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_PH]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -585,7 +583,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label %[[OUTER_LATCH]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL13:%.*]] = phi i64 [ 85, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 85, %[[VEC_EPILOG_ITER_CHECK]] ], [ 1, %[[ITER_CHECK]] ] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i8 [ [[IND_END4]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END5]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i8 [ [[IND_END4]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[INNER:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[INNER]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL13]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ] diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll index dcfebe32302be..b2e07e77b05c5 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll @@ -48,6 +48,8 @@ define dso_local void @test(ptr %start, ptr %end) #0 { ; AVX2: vector.ph: ; AVX2-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[TMP3]], 24 ; AVX2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 9223372036854775776 +; AVX2-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2 +; AVX2-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; AVX2-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -83,8 +85,6 @@ define dso_local void @test(ptr %start, ptr %end) #0 { ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; AVX2-NEXT: br i1 [[CMP_N]], label [[EXIT]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; AVX2: vec.epilog.iter.check: -; AVX2-NEXT: [[TMP26:%.*]] = shl i64 [[N_VEC]], 2 -; AVX2-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP26]] ; AVX2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 ; AVX2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[BB12_PREHEADER1]], label [[BB12_PREHEADER11]], !prof [[PROF3:![0-9]+]] ; AVX2: vec.epilog.ph: