diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index dd7f05465a50b..d2f93bb7de2c8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -541,10 +541,6 @@ class InnerLoopVectorizer { protected: friend class LoopVectorizationPlanner; - /// Iteratively sink the scalarized operands of a predicated instruction into - /// the block that was created for it. - void sinkScalarOperands(Instruction *PredInst); - /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock); @@ -629,9 +625,6 @@ class InnerLoopVectorizer { /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; - /// Store instructions that were predicated. - SmallVector PredicatedInstructions; - /// Trip count of the original loop. Value *TripCount = nullptr; @@ -2385,15 +2378,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, // End if-block. VPRegionBlock *Parent = RepRecipe->getParent()->getParent(); - bool IfPredicateInstr = Parent ? Parent->isReplicator() : false; assert( (Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() || all_of(RepRecipe->operands(), [](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) && "Expected a recipe is either within a region or all of its operands " "are defined outside the vectorized region."); - if (IfPredicateInstr) - PredicatedInstructions.push_back(Cloned); } Value * @@ -2867,8 +2857,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { if (!State.Plan->getVectorLoopRegion()) return; - for (Instruction *PI : PredicatedInstructions) - sinkScalarOperands(&*PI); VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); @@ -2895,82 +2883,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { VF.getKnownMinValue() * UF); } -void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { - // The basic block and loop containing the predicated instruction. - auto *PredBB = PredInst->getParent(); - auto *VectorLoop = LI->getLoopFor(PredBB); - - // Initialize a worklist with the operands of the predicated instruction. - SetVector Worklist(PredInst->op_begin(), PredInst->op_end()); - - // Holds instructions that we need to analyze again. An instruction may be - // reanalyzed if we don't yet know if we can sink it or not. - SmallVector InstsToReanalyze; - - // Returns true if a given use occurs in the predicated block. Phi nodes use - // their operands in their corresponding predecessor blocks. - auto IsBlockOfUsePredicated = [&](Use &U) -> bool { - auto *I = cast(U.getUser()); - BasicBlock *BB = I->getParent(); - if (auto *Phi = dyn_cast(I)) - BB = Phi->getIncomingBlock( - PHINode::getIncomingValueNumForOperand(U.getOperandNo())); - return BB == PredBB; - }; - - // Iteratively sink the scalarized operands of the predicated instruction - // into the block we created for it. When an instruction is sunk, it's - // operands are then added to the worklist. The algorithm ends after one pass - // through the worklist doesn't sink a single instruction. - bool Changed; - do { - // Add the instructions that need to be reanalyzed to the worklist, and - // reset the changed indicator. - Worklist.insert_range(InstsToReanalyze); - InstsToReanalyze.clear(); - Changed = false; - - while (!Worklist.empty()) { - auto *I = dyn_cast(Worklist.pop_back_val()); - - // We can't sink an instruction if it is a phi node, is not in the loop, - // may have side effects or may read from memory. - // TODO: Could do more granular checking to allow sinking - // a load past non-store instructions. - if (!I || isa(I) || !VectorLoop->contains(I) || - I->mayHaveSideEffects() || I->mayReadFromMemory()) - continue; - - // If the instruction is already in PredBB, check if we can sink its - // operands. In that case, VPlan's sinkScalarOperands() succeeded in - // sinking the scalar instruction I, hence it appears in PredBB; but it - // may have failed to sink I's operands (recursively), which we try - // (again) here. - if (I->getParent() == PredBB) { - Worklist.insert_range(I->operands()); - continue; - } - - // It's legal to sink the instruction if all its uses occur in the - // predicated block. Otherwise, there's nothing to do yet, and we may - // need to reanalyze the instruction. - if (!llvm::all_of(I->uses(), IsBlockOfUsePredicated)) { - InstsToReanalyze.push_back(I); - continue; - } - - // Move the instruction to the beginning of the predicated block, and add - // it's operands to the worklist. - I->moveBefore(PredBB->getFirstInsertionPt()); - Worklist.insert_range(I->operands()); - - // The sinking may have enabled other instructions to be sunk, so we will - // need to iterate. - Changed = true; - } - } while (Changed); -} - void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { auto Iter = vp_depth_first_deep(Plan.getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll index e302bf195ef8e..acc41d21af1f5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/blend-costs.ll @@ -213,6 +213,21 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE30:.*]] ] ; CHECK-NEXT: [[IV:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5 +; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6 +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8 +; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10 +; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11 +; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12 +; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13 +; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14 +; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15 ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[GEP_SRC]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -232,7 +247,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; CHECK: [[PRED_STORE_IF1]]: -; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 1 ; CHECK-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1 @@ -241,7 +255,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[TMP7]], i32 2 ; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; CHECK: [[PRED_STORE_IF3]]: -; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 2 ; CHECK-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 @@ -250,7 +263,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i1> [[TMP7]], i32 3 ; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] ; CHECK: [[PRED_STORE_IF5]]: -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[INDEX]], 3 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 3 ; CHECK-NEXT: store i8 [[TMP22]], ptr [[TMP21]], align 1 @@ -259,7 +271,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[TMP7]], i32 4 ; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] ; CHECK: [[PRED_STORE_IF7]]: -; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP24]] ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 4 ; CHECK-NEXT: store i8 [[TMP26]], ptr [[TMP25]], align 1 @@ -268,7 +279,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[TMP7]], i32 5 ; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] ; CHECK: [[PRED_STORE_IF9]]: -; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[INDEX]], 5 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 5 ; CHECK-NEXT: store i8 [[TMP30]], ptr [[TMP29]], align 1 @@ -277,7 +287,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[TMP7]], i32 6 ; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] ; CHECK: [[PRED_STORE_IF11]]: -; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP32]] ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 6 ; CHECK-NEXT: store i8 [[TMP34]], ptr [[TMP33]], align 1 @@ -286,7 +295,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[TMP7]], i32 7 ; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] ; CHECK: [[PRED_STORE_IF13]]: -; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[INDEX]], 7 ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 7 ; CHECK-NEXT: store i8 [[TMP38]], ptr [[TMP37]], align 1 @@ -295,7 +303,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[TMP7]], i32 8 ; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]] ; CHECK: [[PRED_STORE_IF15]]: -; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP40]] ; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 8 ; CHECK-NEXT: store i8 [[TMP42]], ptr [[TMP41]], align 1 @@ -304,7 +311,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[TMP7]], i32 9 ; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]] ; CHECK: [[PRED_STORE_IF17]]: -; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[INDEX]], 9 ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP44]] ; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 9 ; CHECK-NEXT: store i8 [[TMP46]], ptr [[TMP45]], align 1 @@ -313,7 +319,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[TMP7]], i32 10 ; CHECK-NEXT: br i1 [[TMP47]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] ; CHECK: [[PRED_STORE_IF19]]: -; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[INDEX]], 10 ; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP48]] ; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 10 ; CHECK-NEXT: store i8 [[TMP50]], ptr [[TMP49]], align 1 @@ -322,7 +327,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <16 x i1> [[TMP7]], i32 11 ; CHECK-NEXT: br i1 [[TMP51]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] ; CHECK: [[PRED_STORE_IF21]]: -; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[INDEX]], 11 ; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 11 ; CHECK-NEXT: store i8 [[TMP54]], ptr [[TMP53]], align 1 @@ -331,7 +335,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[TMP7]], i32 12 ; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] ; CHECK: [[PRED_STORE_IF23]]: -; CHECK-NEXT: [[TMP56:%.*]] = add i32 [[INDEX]], 12 ; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 12 ; CHECK-NEXT: store i8 [[TMP58]], ptr [[TMP57]], align 1 @@ -340,7 +343,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i1> [[TMP7]], i32 13 ; CHECK-NEXT: br i1 [[TMP59]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] ; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP60:%.*]] = add i32 [[INDEX]], 13 ; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP60]] ; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 13 ; CHECK-NEXT: store i8 [[TMP62]], ptr [[TMP61]], align 1 @@ -349,7 +351,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP63:%.*]] = extractelement <16 x i1> [[TMP7]], i32 14 ; CHECK-NEXT: br i1 [[TMP63]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]] ; CHECK: [[PRED_STORE_IF27]]: -; CHECK-NEXT: [[TMP64:%.*]] = add i32 [[INDEX]], 14 ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 14 ; CHECK-NEXT: store i8 [[TMP66]], ptr [[TMP65]], align 1 @@ -358,7 +359,6 @@ define void @test_blend_feeding_replicated_store_2(ptr noalias %src, ptr %dst, i ; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15 ; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30]] ; CHECK: [[PRED_STORE_IF29]]: -; CHECK-NEXT: [[TMP68:%.*]] = add i32 [[INDEX]], 15 ; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[TMP68]] ; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i8> [[PREDPHI]], i32 15 ; CHECK-NEXT: store i8 [[TMP70]], ptr [[TMP69]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 585c2df08f7d6..c662cf8478651 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -996,28 +996,31 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_ENTRY1:%.*]] = icmp ult i64 1, [[TMP0]] ; TFA_INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; TFA_INTERLEAVE: [[VECTOR_BODY]]: -; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE5]] ] -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[PRED_STORE_CONTINUE5]] ] +; TFA_INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP27:%.*]], %[[TMP19:.*]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[TMP19]] ] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi i1 [ [[ACTIVE_LANE_MASK_ENTRY1]], %[[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT6:%.*]], %[[TMP19]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2]], align 8 -; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; TFA_INTERLEAVE: [[PRED_STORE_IF]]: ; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7:[0-9]+]] -; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00 -; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = select i1 [[TMP7]], double 1.000000e+00, double 0.000000e+00 -; TFA_INTERLEAVE-NEXT: store double [[TMP24]], ptr [[P]], align 8 -; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE]] -; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE]]: -; TFA_INTERLEAVE-NEXT: br i1 [[ACTIVE_LANE_MASK2]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]] -; TFA_INTERLEAVE: [[PRED_STORE_IF4]]: ; TFA_INTERLEAVE-NEXT: [[TMP8:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR7]] +; TFA_INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp ogt double [[TMP5]], 0.000000e+00 ; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = fcmp ogt double [[TMP8]], 0.000000e+00 -; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = xor i1 [[TMP9]], true +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = xor i1 [[TMP6]], true +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = xor i1 [[TMP9]], true +; TFA_INTERLEAVE-NEXT: [[TMP10:%.*]] = select i1 [[ACTIVE_LANE_MASK]], i1 [[TMP18]], i1 false +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], i1 [[TMP20]], i1 false ; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = select i1 [[TMP10]], double 1.000000e+00, double 0.000000e+00 -; TFA_INTERLEAVE-NEXT: store double [[TMP26]], ptr [[P]], align 8 -; TFA_INTERLEAVE-NEXT: br label %[[PRED_STORE_CONTINUE5]] -; TFA_INTERLEAVE: [[PRED_STORE_CONTINUE5]]: +; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP21]], double 1.000000e+00, double 0.000000e+00 +; TFA_INTERLEAVE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[ACTIVE_LANE_MASK2]], double [[PREDPHI3]], double [[TMP26]] +; TFA_INTERLEAVE-NEXT: [[TMP13:%.*]] = xor i1 [[ACTIVE_LANE_MASK]], true +; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = xor i1 [[ACTIVE_LANE_MASK2]], true +; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = xor i1 [[TMP13]], true +; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = xor i1 [[TMP14]], true +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]] +; TFA_INTERLEAVE-NEXT: br i1 [[TMP17]], label %[[BB18:.*]], label %[[TMP19]] +; TFA_INTERLEAVE: [[BB18]]: +; TFA_INTERLEAVE-NEXT: store double [[SPEC_SELECT]], ptr [[P]], align 8 +; TFA_INTERLEAVE-NEXT: br label %[[TMP19]] +; TFA_INTERLEAVE: [[TMP19]]: ; TFA_INTERLEAVE-NEXT: [[TMP27]] = add i64 [[INDEX]], 2 ; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = icmp ult i64 [[INDEX]], [[TMP3]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index d8713bdda689a..d065fdd2d31c9 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -1046,6 +1046,9 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN: [[VECTOR_BODY]]: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 ; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; TF-FIXEDLEN-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] @@ -1056,21 +1059,18 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TF-FIXEDLEN-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] ; TF-FIXEDLEN: [[PRED_STORE_IF1]]: -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 ; TF-FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B]], align 8 ; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE2]] ; TF-FIXEDLEN: [[PRED_STORE_CONTINUE2]]: ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] ; TF-FIXEDLEN: [[PRED_STORE_IF3]]: -; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 ; TF-FIXEDLEN-NEXT: store i64 [[TMP5]], ptr [[B]], align 8 ; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE4]] ; TF-FIXEDLEN: [[PRED_STORE_CONTINUE4]]: ; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 ; TF-FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6]] ; TF-FIXEDLEN: [[PRED_STORE_IF5]]: -; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 ; TF-FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B]], align 8 ; TF-FIXEDLEN-NEXT: br label %[[PRED_STORE_CONTINUE6]] ; TF-FIXEDLEN: [[PRED_STORE_CONTINUE6]]: diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index a1c727f62ba7a..3d77e6d3a94af 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -604,9 +604,21 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE37:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[INDEX]], 5 +; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], 6 +; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 9 +; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 10 +; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 11 ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 12 +; CHECK-NEXT: [[TMP72:%.*]] = add i64 [[INDEX]], 13 +; CHECK-NEXT: [[TMP76:%.*]] = add i64 [[INDEX]], 14 +; CHECK-NEXT: [[TMP80:%.*]] = add i64 [[INDEX]], 15 ; CHECK-NEXT: [[TMP7:%.*]] = shl nsw i64 [[TMP3]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = shl nsw i64 [[TMP4]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = shl nsw i64 [[TMP5]], 2 @@ -638,7 +650,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1 ; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; CHECK: pred.store.if8: -; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = shl nsw i64 [[TMP27]], 2 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP28]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP29]], align 8 @@ -647,7 +658,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP19]], i32 2 ; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] ; CHECK: pred.store.if10: -; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = shl nsw i64 [[TMP31]], 2 ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP32]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP33]], align 8 @@ -656,7 +666,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP19]], i32 3 ; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] ; CHECK: pred.store.if12: -; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP36:%.*]] = shl nsw i64 [[TMP35]], 2 ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP36]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP37]], align 8 @@ -673,7 +682,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP20]], i32 1 ; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] ; CHECK: pred.store.if16: -; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP43:%.*]] = shl nsw i64 [[TMP42]], 2 ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP43]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP44]], align 8 @@ -682,7 +690,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i1> [[TMP20]], i32 2 ; CHECK-NEXT: br i1 [[TMP45]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]] ; CHECK: pred.store.if18: -; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP47:%.*]] = shl nsw i64 [[TMP46]], 2 ; CHECK-NEXT: [[TMP48:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP47]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP48]], align 8 @@ -691,7 +698,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3 ; CHECK-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]] ; CHECK: pred.store.if20: -; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP51:%.*]] = shl nsw i64 [[TMP50]], 2 ; CHECK-NEXT: [[TMP52:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP51]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP52]], align 8 @@ -708,7 +714,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP21]], i32 1 ; CHECK-NEXT: br i1 [[TMP56]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]] ; CHECK: pred.store.if24: -; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX]], 9 ; CHECK-NEXT: [[TMP58:%.*]] = shl nsw i64 [[TMP57]], 2 ; CHECK-NEXT: [[TMP59:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP58]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP59]], align 8 @@ -717,7 +722,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP60:%.*]] = extractelement <4 x i1> [[TMP21]], i32 2 ; CHECK-NEXT: br i1 [[TMP60]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]] ; CHECK: pred.store.if26: -; CHECK-NEXT: [[TMP61:%.*]] = add i64 [[INDEX]], 10 ; CHECK-NEXT: [[TMP62:%.*]] = shl nsw i64 [[TMP61]], 2 ; CHECK-NEXT: [[TMP63:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP62]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP63]], align 8 @@ -726,7 +730,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP21]], i32 3 ; CHECK-NEXT: br i1 [[TMP64]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]] ; CHECK: pred.store.if28: -; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX]], 11 ; CHECK-NEXT: [[TMP66:%.*]] = shl nsw i64 [[TMP65]], 2 ; CHECK-NEXT: [[TMP67:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP66]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP67]], align 8 @@ -743,7 +746,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP22]], i32 1 ; CHECK-NEXT: br i1 [[TMP71]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]] ; CHECK: pred.store.if32: -; CHECK-NEXT: [[TMP72:%.*]] = add i64 [[INDEX]], 13 ; CHECK-NEXT: [[TMP73:%.*]] = shl nsw i64 [[TMP72]], 2 ; CHECK-NEXT: [[TMP74:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP73]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP74]], align 8 @@ -752,7 +754,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP75:%.*]] = extractelement <4 x i1> [[TMP22]], i32 2 ; CHECK-NEXT: br i1 [[TMP75]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35:%.*]] ; CHECK: pred.store.if34: -; CHECK-NEXT: [[TMP76:%.*]] = add i64 [[INDEX]], 14 ; CHECK-NEXT: [[TMP77:%.*]] = shl nsw i64 [[TMP76]], 2 ; CHECK-NEXT: [[TMP78:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP77]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP78]], align 8 @@ -761,7 +762,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i1> [[TMP22]], i32 3 ; CHECK-NEXT: br i1 [[TMP79]], label [[PRED_STORE_IF36:%.*]], label [[PRED_STORE_CONTINUE37]] ; CHECK: pred.store.if36: -; CHECK-NEXT: [[TMP80:%.*]] = add i64 [[INDEX]], 15 ; CHECK-NEXT: [[TMP81:%.*]] = shl nsw i64 [[TMP80]], 2 ; CHECK-NEXT: [[TMP82:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP81]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP82]], align 8 @@ -786,6 +786,9 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX40:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT51:%.*]], [[PRED_STORE_CONTINUE50:%.*]] ] ; CHECK-NEXT: [[TMP86:%.*]] = add i64 [[INDEX40]], 0 +; CHECK-NEXT: [[TMP95:%.*]] = add i64 [[INDEX40]], 1 +; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX40]], 2 +; CHECK-NEXT: [[TMP103:%.*]] = add i64 [[INDEX40]], 3 ; CHECK-NEXT: [[TMP87:%.*]] = shl nsw i64 [[TMP86]], 2 ; CHECK-NEXT: [[TMP89:%.*]] = getelementptr nusw double, ptr [[A]], i64 [[TMP87]] ; CHECK-NEXT: [[WIDE_VEC41:%.*]] = load <16 x double>, ptr [[TMP89]], align 8 @@ -802,7 +805,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP90]], i32 1 ; CHECK-NEXT: br i1 [[TMP94]], label [[PRED_STORE_IF45:%.*]], label [[PRED_STORE_CONTINUE46:%.*]] ; CHECK: pred.store.if45: -; CHECK-NEXT: [[TMP95:%.*]] = add i64 [[INDEX40]], 1 ; CHECK-NEXT: [[TMP96:%.*]] = shl nsw i64 [[TMP95]], 2 ; CHECK-NEXT: [[TMP97:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP96]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP97]], align 8 @@ -811,7 +813,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP98:%.*]] = extractelement <4 x i1> [[TMP90]], i32 2 ; CHECK-NEXT: br i1 [[TMP98]], label [[PRED_STORE_IF47:%.*]], label [[PRED_STORE_CONTINUE48:%.*]] ; CHECK: pred.store.if47: -; CHECK-NEXT: [[TMP99:%.*]] = add i64 [[INDEX40]], 2 ; CHECK-NEXT: [[TMP100:%.*]] = shl nsw i64 [[TMP99]], 2 ; CHECK-NEXT: [[TMP101:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP100]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP101]], align 8 @@ -820,7 +821,6 @@ define void @cost_duplicate_recipe_for_sinking(ptr %A, i64 %N) #2 { ; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i1> [[TMP90]], i32 3 ; CHECK-NEXT: br i1 [[TMP102]], label [[PRED_STORE_IF49:%.*]], label [[PRED_STORE_CONTINUE50]] ; CHECK: pred.store.if49: -; CHECK-NEXT: [[TMP103:%.*]] = add i64 [[INDEX40]], 3 ; CHECK-NEXT: [[TMP104:%.*]] = shl nsw i64 [[TMP103]], 2 ; CHECK-NEXT: [[TMP105:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP104]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP105]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll index c91ead00a950d..644f10b617eb7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -8,13 +8,13 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll index 6424fb5565a63..58ae5022b9f90 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -275,7 +275,19 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP10]], i64 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP11]], i64 12 ; CHECK-NEXT: [[OFFSET_IDX6:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX6]] +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]] +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]] +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP8]], i64 12 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], @@ -283,43 +295,31 @@ define void @example3(i32 %n, ptr noalias nocapture %p, ptr noalias nocapture %q ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[Q:%.*]], i64 [[OFFSET_IDX6]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[NEXT_GEP7]], align 16 ; CHECK-NEXT: store i32 [[TMP5]], ptr [[NEXT_GEP]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 -; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]] -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4 -; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16 -; CHECK-NEXT: store i32 [[TMP9]], ptr [[NEXT_GEP3]], align 16 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP6]], align 16 +; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP1]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] ; CHECK: pred.store.if13: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP11]], i64 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]] -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP12]], i64 8 -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16 -; CHECK-NEXT: store i32 [[TMP13]], ptr [[NEXT_GEP4]], align 16 +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[NEXT_GEP9]], align 16 +; CHECK-NEXT: store i32 [[TMP15]], ptr [[NEXT_GEP2]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]] ; CHECK: pred.store.continue14: -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 -; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] ; CHECK: pred.store.if15: -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[TMP15]], i64 12 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[Q]], i64 [[OFFSET_IDX6]] -; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[TMP16]], i64 12 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP10]], align 16 -; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP5]], align 16 +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[NEXT_GEP8]], align 16 +; CHECK-NEXT: store i32 [[TMP17]], ptr [[NEXT_GEP3]], align 16 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]] ; CHECK: pred.store.continue16: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -453,7 +453,19 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP7]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP13]], i64 6 ; CHECK-NEXT: [[OFFSET_IDX5:%.*]] = shl i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX5]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]] +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP19]], i64 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]] +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP20]], i64 12 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], @@ -461,8 +473,8 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX5]] -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]] +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i32 [[TMP4]], 7 @@ -472,10 +484,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]] -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[TMP7]], i64 4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 2 ; CHECK-NEXT: [[TMP9:%.*]] = load i16, ptr [[NEXT_GEP2]], align 2 ; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP9]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 7 @@ -485,10 +493,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 ; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]] -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 4 ; CHECK-NEXT: [[TMP15:%.*]] = load i16, ptr [[NEXT_GEP3]], align 2 ; CHECK-NEXT: [[TMP16:%.*]] = zext i16 [[TMP15]] to i32 ; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i32 [[TMP16]], 7 @@ -498,10 +502,6 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 ; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE15]] ; CHECK: pred.store.if13: -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX5]] -; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[TMP19]], i64 12 -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[TMP20]], i64 6 ; CHECK-NEXT: [[TMP21:%.*]] = load i16, ptr [[NEXT_GEP4]], align 2 ; CHECK-NEXT: [[TMP22:%.*]] = zext i16 [[TMP21]] to i32 ; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i32 [[TMP22]], 7 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 736f88e7f2d04..57c35e874a55c 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -200,13 +200,13 @@ for.end: ; INTER-NOT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %pair, ptr %p, i64 %i, i32 0 ; INTER: vector.body ; INTER: %index = phi i64 [ 0, %vector.ph ], [ %index.next, {{.*}} ] +; INTER: %[[I1:.+]] = or disjoint i64 %index, 1 +; INTER: %[[I2:.+]] = or disjoint i64 %index, 2 +; INTER: %[[I3:.+]] = or disjoint i64 %index, 3 ; INTER: %[[G0:.+]] = getelementptr inbounds %pair, ptr %p, i64 %index, i32 0 ; INTER: %wide.vec = load <8 x i32>, ptr %[[G0]], align 8 -; INTER: %[[I1:.+]] = or disjoint i64 %index, 1 ; INTER: getelementptr inbounds %pair, ptr %p, i64 %[[I1]], i32 0 -; INTER: %[[I2:.+]] = or disjoint i64 %index, 2 ; INTER: getelementptr inbounds %pair, ptr %p, i64 %[[I2]], i32 0 -; INTER: %[[I3:.+]] = or disjoint i64 %index, 3 ; INTER: getelementptr inbounds %pair, ptr %p, i64 %[[I3]], i32 0 ; INTER: br i1 {{.*}}, label %middle.block, label %vector.body ; diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll index eb688f86d84ee..af8bcef775e62 100644 --- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll +++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll @@ -969,6 +969,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -986,7 +987,6 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_1 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 @@ -1219,6 +1219,7 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -1236,7 +1237,6 @@ define void @deref_assumption_in_preheader_constant_trip_count_align_4_not_known ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 @@ -1314,6 +1314,7 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -1331,7 +1332,6 @@ define void @deref_assumption_too_small_in_preheader_constant_trip_count_align_4 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index 7ac0eb038cb7a..767df2e38d862 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -94,6 +94,7 @@ define i32 @test(ptr nocapture %f) #0 { ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -110,7 +111,6 @@ define i32 @test(ptr nocapture %f) #0 { ; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if1: -; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]] ; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], 20 @@ -322,6 +322,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] ; VEC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 +; VEC-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1 ; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP6]] ; VEC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 @@ -330,7 +331,6 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP6]] ; VEC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 -; VEC-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1 ; VEC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP13]] ; VEC-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 @@ -482,6 +482,7 @@ define void @minimal_bit_widths(i1 %c) { ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 @@ -490,7 +491,6 @@ define void @minimal_bit_widths(i1 %c) { ; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] ; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1 -; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr undef, i64 [[TMP7]] ; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1 @@ -612,6 +612,7 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) { ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]] ; VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1 @@ -621,7 +622,6 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) { ; VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP0]] ; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 0 ; VEC-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1 -; VEC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP7]] ; VEC-NEXT: [[TMP9:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 9c3ba8fbcf036..7057fded1c3ac 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1961,6 +1961,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -1974,7 +1975,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.if1: -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 @@ -2026,6 +2026,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[PRED_UDIV_CONTINUE2]] ] +; IND-NEXT: [[TMP6:%.*]] = or disjoint i32 [[INDEX]], 1 ; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 @@ -2039,7 +2040,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] ; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]] ; IND: pred.udiv.if1: -; IND-NEXT: [[TMP6:%.*]] = or disjoint i32 [[INDEX]], 1 ; IND-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1 ; IND-NEXT: [[TMP8:%.*]] = udiv i32 [[TMP7]], [[TMP6]] ; IND-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP8]], i64 1 @@ -2093,6 +2093,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE8]] ] +; UNROLL-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2 +; UNROLL-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3 ; UNROLL-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8 @@ -2108,7 +2111,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] ; UNROLL: pred.udiv.if3: -; UNROLL-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1 ; UNROLL-NEXT: [[TMP9:%.*]] = udiv i32 [[TMP8]], [[TMP7]] ; UNROLL-NEXT: [[TMP10:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP9]], i64 1 @@ -2117,7 +2119,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF3]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] ; UNROLL: pred.udiv.if5: -; UNROLL-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2 ; UNROLL-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i64 0 ; UNROLL-NEXT: [[TMP14:%.*]] = udiv i32 [[TMP13]], [[TMP12]] ; UNROLL-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i64 0 @@ -2126,7 +2127,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], [[PRED_UDIV_IF5]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]] ; UNROLL: pred.udiv.if7: -; UNROLL-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3 ; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i64 1 ; UNROLL-NEXT: [[TMP19:%.*]] = udiv i32 [[TMP18]], [[TMP17]] ; UNROLL-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP19]], i64 1 @@ -2185,6 +2185,9 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1 +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 @@ -2200,7 +2203,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ] ; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] ; UNROLL-NO-IC: pred.udiv.if3: -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1 @@ -2209,7 +2211,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF3]] ] ; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] ; UNROLL-NO-IC: pred.udiv.if5: -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add i32 [[INDEX]], 2 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], [[TMP16]] ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0 @@ -2218,7 +2219,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP19]], [[PRED_UDIV_IF5]] ] ; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]] ; UNROLL-NO-IC: pred.udiv.if7: -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = add i32 [[INDEX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = udiv i32 [[TMP23]], [[TMP22]] ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP24]], i32 1 @@ -2274,6 +2274,13 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE16:%.*]] ] ; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_UDIV_CONTINUE16]] ] ; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE16]] ] +; INTERLEAVE-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1 +; INTERLEAVE-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2 +; INTERLEAVE-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3 +; INTERLEAVE-NEXT: [[TMP22:%.*]] = or disjoint i32 [[INDEX]], 4 +; INTERLEAVE-NEXT: [[TMP27:%.*]] = or disjoint i32 [[INDEX]], 5 +; INTERLEAVE-NEXT: [[TMP32:%.*]] = or disjoint i32 [[INDEX]], 6 +; INTERLEAVE-NEXT: [[TMP37:%.*]] = or disjoint i32 [[INDEX]], 7 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 @@ -2289,7 +2296,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] ; INTERLEAVE: pred.udiv.if3: -; INTERLEAVE-NEXT: [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = udiv i32 [[TMP8]], [[TMP7]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i64 1 @@ -2298,7 +2304,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF3]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] ; INTERLEAVE: pred.udiv.if5: -; INTERLEAVE-NEXT: [[TMP12:%.*]] = or disjoint i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = udiv i32 [[TMP13]], [[TMP12]] ; INTERLEAVE-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 2 @@ -2307,7 +2312,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], [[PRED_UDIV_IF5]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] ; INTERLEAVE: pred.udiv.if7: -; INTERLEAVE-NEXT: [[TMP17:%.*]] = or disjoint i32 [[INDEX]], 3 ; INTERLEAVE-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = udiv i32 [[TMP18]], [[TMP17]] ; INTERLEAVE-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i64 3 @@ -2316,7 +2320,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]] ; INTERLEAVE: pred.udiv.if9: -; INTERLEAVE-NEXT: [[TMP22:%.*]] = or disjoint i32 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 0 ; INTERLEAVE-NEXT: [[TMP24:%.*]] = udiv i32 [[TMP23]], [[TMP22]] ; INTERLEAVE-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> poison, i32 [[TMP24]], i64 0 @@ -2325,7 +2328,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE8]] ], [ [[TMP25]], [[PRED_UDIV_IF9]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]] ; INTERLEAVE: pred.udiv.if11: -; INTERLEAVE-NEXT: [[TMP27:%.*]] = or disjoint i32 [[INDEX]], 5 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 1 ; INTERLEAVE-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], [[TMP27]] ; INTERLEAVE-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP29]], i64 1 @@ -2334,7 +2336,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP31:%.*]] = phi <4 x i32> [ [[TMP26]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP30]], [[PRED_UDIV_IF11]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]] ; INTERLEAVE: pred.udiv.if13: -; INTERLEAVE-NEXT: [[TMP32:%.*]] = or disjoint i32 [[INDEX]], 6 ; INTERLEAVE-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 2 ; INTERLEAVE-NEXT: [[TMP34:%.*]] = udiv i32 [[TMP33]], [[TMP32]] ; INTERLEAVE-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP34]], i64 2 @@ -2343,7 +2344,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP36:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP35]], [[PRED_UDIV_IF13]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16]] ; INTERLEAVE: pred.udiv.if15: -; INTERLEAVE-NEXT: [[TMP37:%.*]] = or disjoint i32 [[INDEX]], 7 ; INTERLEAVE-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3 ; INTERLEAVE-NEXT: [[TMP39:%.*]] = udiv i32 [[TMP38]], [[TMP37]] ; INTERLEAVE-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP39]], i64 3 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll index fb4545cb715bb..c2f912fb4bc8d 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -27,6 +27,7 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> @@ -42,7 +43,6 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP6]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 ; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 402632b51698a..25df4db3c6c99 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -19,6 +19,7 @@ define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 @@ -35,7 +36,6 @@ define i16 @test_access_size_not_multiple_of_align(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[ALLOCA]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1 @@ -116,6 +116,7 @@ define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %te ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TEST_BASE:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 @@ -132,7 +133,6 @@ define i32 @test_access_size_multiple_of_align_but_offset_by_1(i64 %len, ptr %te ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[START]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1 @@ -230,6 +230,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4 @@ -250,7 +251,6 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 ; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1 @@ -260,7 +260,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -287,7 +287,7 @@ define i32 @loop_requires_scev_predicate(ptr %dest, i32 %end) { ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 ; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 ; @@ -344,6 +344,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 @@ -368,7 +369,6 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP14]] ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[REVERSE2]], i32 1 ; CHECK-NEXT: [[TMP17:%.*]] = shl nsw i32 [[TMP16]], 2 @@ -377,7 +377,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK: pred.store.continue4: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -399,7 +399,7 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK: for.inc: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) ; CHECK-NEXT: ret void @@ -490,7 +490,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -513,7 +513,7 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK: for.inc: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) ; CHECK-NEXT: ret void @@ -584,7 +584,7 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP15:%.*]] = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> [[TMP13]]) ; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] @@ -609,7 +609,7 @@ define i16 @test_strided_access(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[VAL_PHI:%.*]] = phi i16 [ 0, [[LOOP]] ], [ [[VAL]], [[PRED]] ] ; CHECK-NEXT: [[ACCUM_NEXT]] = add i16 [[ACCUM]], [[VAL_PHI]] ; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV]], 4095 -; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: loop_exit: ; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i16 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[ACCUM_NEXT_LCSSA]] @@ -659,6 +659,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 @@ -684,7 +685,6 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_DEST]], i64 0, i64 [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = shl nsw i32 [[TMP12]], 2 ; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 @@ -693,7 +693,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 -2) ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -716,7 +716,7 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK: for.inc: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp eq i64 [[IV]], 0 -; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[DEST:%.*]], ptr [[LOCAL_DEST]], i64 1024, i1 false) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll index 7e00cb74a69ed..ca15b65ec5c36 100644 --- a/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/load-of-struct-deref-pred.ll @@ -16,21 +16,21 @@ define void @accesses_to_struct_dereferenceable(ptr noalias %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> [[WIDE_LOAD1]] -; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> [[WIDE_LOAD1]] +; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -100,55 +100,55 @@ define void @accesses_to_struct_may_not_be_dereferenceable_due_to_loop_bound(ptr ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP19]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 +; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP3]] ; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP25]], i32 3 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP25]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP29]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP27]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP30]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 @@ -222,56 +222,56 @@ define void @accesses_to_struct_may_not_be_dereferenceable_access_size(ptr noali ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr @foo, i64 0, i32 1, i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> poison, i64 [[TMP10]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i64> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP1]] +; CHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP15]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i64> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i64> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 4 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP19]], i32 2 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP2]] +; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP20]], i32 2 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] ; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i64> [ [[TMP15]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 -; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] +; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i64> [ [[TMP17]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 +; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr @foo, i64 0, i32 1, i64 [[TMP3]] ; CHECK-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP24]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP25]], i32 3 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP25]], i32 3 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i64> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i64> [ [[TMP22]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP28:%.*]] = trunc <4 x i64> [[TMP27]] to <4 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_FOO]], ptr @foo, i64 0, i32 0, i64 [[TMP0]] ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP30]], align 4 -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP28]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> [[TMP28]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP31]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 914b9ad4a9e5a..f1925c39c75ac 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -1077,6 +1077,7 @@ define void @scalar_predication(ptr %addr) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[ADDR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 @@ -1092,7 +1093,6 @@ define void @scalar_predication(ptr %addr) { ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP8]] ; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP9]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 06b6a2b29c01e..c911aa2947066 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -25,44 +25,44 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -2 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -3 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP3]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP4]] +; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP11]], splat (i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP9]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 +; CHECK-NEXT: store i8 95, ptr [[TMP14]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP12]], align 1 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP2]], i64 -1 +; CHECK-NEXT: store i8 95, ptr [[TMP22]], align 1 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] ; CHECK: pred.store.continue6: -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -2 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1 -; CHECK-NEXT: store i8 95, ptr [[TMP15]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP3]], i64 -1 +; CHECK-NEXT: store i8 95, ptr [[TMP16]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3 +; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], -3 -; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP4]], i64 -1 ; CHECK-NEXT: store i8 95, ptr [[TMP18]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index a85718d1a382f..4e1ba0cd230a7 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -360,6 +360,9 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 5, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP22:%.*]] = or disjoint i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP32:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer @@ -379,7 +382,6 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 ; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP12:%.*]] = or disjoint i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP14]], i64 1 @@ -393,7 +395,6 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 ; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP22:%.*]] = or disjoint i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP24]], i64 2 @@ -407,7 +408,6 @@ define i32 @simple_chained_rdx(ptr noalias %a, ptr noalias %b, ptr noalias %cond ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 ; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP32:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP32]] ; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP34]], i64 3 diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 4c9b4a4df0e37..77dfca6f659ae 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -484,6 +484,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE8]] ] ; CHECK-VF4-IC1-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[PRED_STORE_CONTINUE8]] ] ; CHECK-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4-IC1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF4-IC1-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2 +; CHECK-VF4-IC1-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 3 ; CHECK-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] ; CHECK-VF4-IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 ; CHECK-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]] @@ -503,7 +506,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK-VF4-IC1: pred.store.if3: -; CHECK-VF4-IC1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF4-IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]] ; CHECK-VF4-IC1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 1 @@ -513,7 +515,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; CHECK-VF4-IC1: pred.store.if5: -; CHECK-VF4-IC1-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 2 ; CHECK-VF4-IC1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP20]] ; CHECK-VF4-IC1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP22]], 1 @@ -523,7 +524,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC1-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 ; CHECK-VF4-IC1-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]] ; CHECK-VF4-IC1: pred.store.if7: -; CHECK-VF4-IC1-NEXT: [[TMP26:%.*]] = add i64 [[INDEX]], 3 ; CHECK-VF4-IC1-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP26]] ; CHECK-VF4-IC1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC1-NEXT: [[TMP29:%.*]] = add nsw i32 [[TMP28]], 1 @@ -598,6 +598,13 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4-IC2-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF4-IC2-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2 +; CHECK-VF4-IC2-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 3 +; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 +; CHECK-VF4-IC2-NEXT: [[TMP44:%.*]] = add i64 [[INDEX]], 5 +; CHECK-VF4-IC2-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 6 +; CHECK-VF4-IC2-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 7 ; CHECK-VF4-IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] ; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 @@ -623,7 +630,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP20]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] ; CHECK-VF4-IC2: pred.store.if6: -; CHECK-VF4-IC2-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF4-IC2-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] ; CHECK-VF4-IC2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP24:%.*]] = add nsw i32 [[TMP23]], 1 @@ -633,7 +639,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]] ; CHECK-VF4-IC2: pred.store.if8: -; CHECK-VF4-IC2-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2 ; CHECK-VF4-IC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP27]] ; CHECK-VF4-IC2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP30:%.*]] = add nsw i32 [[TMP29]], 1 @@ -643,7 +648,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP32]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]] ; CHECK-VF4-IC2: pred.store.if10: -; CHECK-VF4-IC2-NEXT: [[TMP33:%.*]] = add i64 [[INDEX]], 3 ; CHECK-VF4-IC2-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP33]] ; CHECK-VF4-IC2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP36:%.*]] = add nsw i32 [[TMP35]], 1 @@ -653,7 +657,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP38]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]] ; CHECK-VF4-IC2: pred.store.if12: -; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 ; CHECK-VF4-IC2-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]] ; CHECK-VF4-IC2-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP41:%.*]] = add nsw i32 [[TMP40]], 1 @@ -663,7 +666,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP43:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP43]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]] ; CHECK-VF4-IC2: pred.store.if14: -; CHECK-VF4-IC2-NEXT: [[TMP44:%.*]] = add i64 [[INDEX]], 5 ; CHECK-VF4-IC2-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP44]] ; CHECK-VF4-IC2-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP47:%.*]] = add nsw i32 [[TMP46]], 1 @@ -673,7 +675,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP49:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP49]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17:%.*]] ; CHECK-VF4-IC2: pred.store.if16: -; CHECK-VF4-IC2-NEXT: [[TMP50:%.*]] = add i64 [[INDEX]], 6 ; CHECK-VF4-IC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP50]] ; CHECK-VF4-IC2-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP53:%.*]] = add nsw i32 [[TMP52]], 1 @@ -683,7 +684,6 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[TMP55:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3 ; CHECK-VF4-IC2-NEXT: br i1 [[TMP55]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19]] ; CHECK-VF4-IC2: pred.store.if18: -; CHECK-VF4-IC2-NEXT: [[TMP56:%.*]] = add i64 [[INDEX]], 7 ; CHECK-VF4-IC2-NEXT: [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP56]] ; CHECK-VF4-IC2-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4, !alias.scope [[META9]], !noalias [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP59:%.*]] = add nsw i32 [[TMP58]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll index 11294b8fef3ee..fe8c64f0536a3 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll @@ -16,6 +16,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ] ; CHECK-VF2IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[PRED_LOAD_CONTINUE2]] ] ; CHECK-VF2IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF2IC1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]] ; CHECK-VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -32,7 +33,6 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-VF2IC1-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-VF2IC1-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]] ; CHECK-VF2IC1: [[PRED_LOAD_IF1]]: -; CHECK-VF2IC1-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF2IC1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP11]] ; CHECK-VF2IC1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 ; CHECK-VF2IC1-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/struct-return.ll b/llvm/test/Transforms/LoopVectorize/struct-return.ll index 1b2a809a552d8..7e0e1410932c9 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return.ll @@ -215,7 +215,7 @@ define void @scalarized_predicated_struct_return(ptr %a) optsize { ; CHECK-LABEL: define void @scalarized_predicated_struct_return ; CHECK: vector.body: ; CHECK: pred.store.if: -; CHECK: tail call { i64, i64 } @bar_i64(i64 %5) +; CHECK: tail call { i64, i64 } @bar_i64(i64 {{.+}}) entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index caa5969bbc365..855d401686b7b 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -92,6 +92,13 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP4]] +; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP6]] ; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 @@ -102,28 +109,21 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14 ; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; CHECK: pred.store.if7: -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 8 -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP1]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] ; CHECK: pred.store.continue8: ; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] ; CHECK: pred.store.if9: -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP6]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP2]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] ; CHECK: pred.store.continue10: ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.if11: -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 24 -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP7]] ; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP3]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]] ; CHECK: pred.store.continue12: diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll index f31885219f53f..27160e6f62470 100644 --- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll +++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll @@ -153,6 +153,9 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi ; CHECK: vector.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[IV]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 @@ -167,7 +170,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: store i32 1, ptr [[TMP12]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] @@ -175,7 +177,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: store i32 1, ptr [[TMP15]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] @@ -183,7 +184,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[LOOP_LATCH]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]] ; CHECK-NEXT: store i32 1, ptr [[TMP18]], align 4 ; CHECK-NEXT: br label [[LOOP_LATCH]] @@ -265,6 +265,9 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco ; CHECK: vector.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[IV]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 @@ -279,7 +282,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] ; CHECK-NEXT: store i32 1, ptr [[TMP12]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] @@ -287,7 +289,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 ; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; CHECK: pred.store.if3: -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]] ; CHECK-NEXT: store i32 1, ptr [[TMP15]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] @@ -295,7 +296,6 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 ; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[LOOP_LATCH]] ; CHECK: pred.store.if5: -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[IV]], 3 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP17]] ; CHECK-NEXT: store i32 1, ptr [[TMP18]], align 4 ; CHECK-NEXT: br label [[LOOP_LATCH]]