diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f233f0dc1b025..b693683c71ed6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -802,6 +802,14 @@ const VPBasicBlock *VPBasicBlock::getCFGPredecessor(unsigned Idx) const { return Pred->getExitingBasicBlock(); } +VPHeaderPHIRecipe *VPRegionBlock::getCurrentIteration() { + VPCurrentIterationPHIRecipe *CurrentIteration = + vputils::getCurrentIterationPhi(*getPlan()); + if (CurrentIteration) + return CurrentIteration; + return getCanonicalIV(); +} + InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { if (!isReplicator()) { InstructionCost Cost = 0; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e2ea4cbddba9d..294afa8978821 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4472,6 +4472,10 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { const Type *getCanonicalIVType() const { return getCanonicalIV()->getScalarType(); } + + /// Return VPCurrentIterationPHIRecipe for variable-length + /// stepping loops, or VPCanonicalIVPHIRecipe otherwise. + VPHeaderPHIRecipe *getCurrentIteration(); }; inline VPRegionBlock *VPRecipeBase::getRegion() { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b4c74c8776fc3..147f5d370710e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -3090,7 +3090,7 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) { VPValue *HeaderMask = nullptr, *EVL = nullptr; for (VPRecipeBase &R : *Plan.getVectorLoopRegion()->getEntryBasicBlock()) { if (match(&R, m_SpecificICmp(CmpInst::ICMP_ULT, m_StepVector(), - m_VPValue(EVL))) && + m_ZExtOrSelf(m_VPValue(EVL)))) && match(EVL, m_EVL(m_VPValue()))) { HeaderMask = R.getVPSingleValue(); break; @@ -3120,19 +3120,44 @@ void VPlanTransforms::optimizeEVLMasks(VPlan &Plan) { } } -/// After replacing the canonical IV with a EVL-based IV, fixup recipes that use -/// VF to use the EVL instead to avoid incorrect updates on the penultimate -/// iteration. -static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) { - VPTypeAnalysis TypeInfo(Plan); +/// Add a VPCurrentIterationPHIRecipe to \p Plan with \p NewStep as the step +/// value, replace all uses of VPCanonicalIVPHIRecipe with +/// VPCurrentIterationPHIRecipe except for the canonical IV increment, and +/// fixup recipes that use VF to use \p NewStep instead. After this +/// transformation, VPCanonicalIVPHIRecipe is used only for loop iterations +/// counting. +static void addCurrentIterationPhi(VPlan &Plan, VPValue *NewStep) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); + auto *CanonicalIVPHI = LoopRegion->getCanonicalIV(); + auto *CanIVTy = LoopRegion->getCanonicalIVType(); + auto *CanonicalIVIncrement = + cast(CanonicalIVPHI->getBackedgeValue()); + VPValue *StartV = CanonicalIVPHI->getStartValue(); + // Create the CurrentIteration recipe in the vector loop. + auto *CurrentIteration = + new VPCurrentIterationPHIRecipe(StartV, DebugLoc::getUnknown()); + CurrentIteration->insertAfter(CanonicalIVPHI); + VPBuilder Builder(CanonicalIVIncrement); + VPTypeAnalysis TypeInfo(Plan); + auto *NextIter = Builder.createAdd(NewStep, CurrentIteration, + CanonicalIVIncrement->getDebugLoc(), + "current.iteration.next", + {CanonicalIVIncrement->hasNoUnsignedWrap(), + CanonicalIVIncrement->hasNoSignedWrap()}); + CurrentIteration->addOperand(NextIter); + + // Replace all uses of VPCanonicalIVPHIRecipe by + // VPCurrentIterationPHIRecipe except for the canonical IV increment. + CanonicalIVPHI->replaceAllUsesWith(CurrentIteration); + CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); + + // Fixup recipes that use VF to use NewStep instead. assert(all_of(Plan.getVF().users(), IsaPred) && - "User of VF that we can't transform to EVL."); - Plan.getVF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) { + "User of VF that we can't transform to variable-length step."); + Plan.getVF().replaceUsesWithIf(NewStep, [](VPUser &U, unsigned Idx) { return isa(U); }); @@ -3145,28 +3170,43 @@ static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) { }) && "Only users of VFxUF should be VPWidenPointerInductionRecipe and the " "increment of the canonical induction."); - Plan.getVFxUF().replaceUsesWithIf(&EVL, [](VPUser &U, unsigned Idx) { + Plan.getVFxUF().replaceUsesWithIf(NewStep, [](VPUser &U, unsigned Idx) { // Only replace uses in VPWidenPointerInductionRecipe; The increment of the // canonical induction must not be updated. return isa(U); }); - // Create a scalar phi to track the previous EVL if fixed-order recurrence is - // contained. + // Create a scalar phi to track the previous variable-length step if + // fixed-order recurrence is contained. + VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); bool ContainsFORs = any_of(Header->phis(), IsaPred); if (ContainsFORs) { - // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL. - VPValue *MaxEVL = &Plan.getVF(); - // Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer. + // Emit VPScalarCastRecipe in preheader if VF is not a 32 bits + // integer. VPBuilder Builder(LoopRegion->getPreheaderVPBB()); - MaxEVL = Builder.createScalarZExtOrTrunc( - MaxEVL, Type::getInt32Ty(Plan.getContext()), - TypeInfo.inferScalarType(MaxEVL), DebugLoc::getUnknown()); + VPValue *VFI32 = Builder.createScalarZExtOrTrunc( + &Plan.getVF(), Type::getInt32Ty(Plan.getContext()), CanIVTy, + DebugLoc::getUnknown()); + + VPValue *StepI32; + VPValue *StepSource; + if (match(NewStep, m_ZExt(m_VPValue(StepSource))) && + TypeInfo.inferScalarType(StepSource) == + Type::getInt32Ty(Plan.getContext())) { + StepI32 = StepSource; + } else { + VPRecipeBase *StepR = NewStep->getDefiningRecipe(); + Builder.setInsertPoint(StepR->getParent(), + std::next(StepR->getIterator())); + StepI32 = Builder.createScalarZExtOrTrunc( + NewStep, Type::getInt32Ty(Plan.getContext()), CanIVTy, + DebugLoc::getUnknown()); + } Builder.setInsertPoint(Header, Header->getFirstNonPhi()); - VPValue *PrevEVL = Builder.createScalarPhi( - {MaxEVL, &EVL}, DebugLoc::getUnknown(), "prev.evl"); + VPValue *PrevStep = Builder.createScalarPhi( + {VFI32, StepI32}, DebugLoc::getUnknown(), "prev.step"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_deep(Plan.getVectorLoopRegion()->getEntry()))) { @@ -3180,7 +3220,7 @@ static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) { ConstantInt::getSigned(Type::getInt32Ty(Plan.getContext()), -1)); VPWidenIntrinsicRecipe *VPSplice = new VPWidenIntrinsicRecipe( Intrinsic::experimental_vp_splice, - {V1, V2, Imm, Plan.getTrue(), PrevEVL, &EVL}, + {V1, V2, Imm, Plan.getTrue(), PrevStep, StepI32}, TypeInfo.inferScalarType(R.getVPSingleValue()), {}, {}, R.getDebugLoc()); VPSplice->insertBefore(&R); @@ -3193,30 +3233,24 @@ static void fixupVFUsersForEVL(VPlan &Plan, VPValue &EVL) { if (!HeaderMask) return; - // Replace header masks with a mask equivalent to predicating by EVL: + // Replace header masks with a mask equivalent to predicating by + // variable-length step: // // icmp ule widen-canonical-iv backedge-taken-count // -> - // icmp ult step-vector, EVL - VPRecipeBase *EVLR = EVL.getDefiningRecipe(); - VPBuilder Builder(EVLR->getParent(), std::next(EVLR->getIterator())); - Type *EVLType = TypeInfo.inferScalarType(&EVL); - VPValue *EVLMask = Builder.createICmp( + // icmp ult step-vector, NewStep + VPRecipeBase *StepR = NewStep->getDefiningRecipe(); + Builder.setInsertPoint(StepR->getParent(), std::next(StepR->getIterator())); + VPValue *NewMask = Builder.createICmp( CmpInst::ICMP_ULT, - Builder.createNaryOp(VPInstruction::StepVector, {}, EVLType), &EVL); - HeaderMask->replaceAllUsesWith(EVLMask); + Builder.createNaryOp(VPInstruction::StepVector, {}, CanIVTy), NewStep); + HeaderMask->replaceAllUsesWith(NewMask); } /// Converts a tail folded vector loop region to step by /// VPInstruction::ExplicitVectorLength elements instead of VF elements each /// iteration. /// -/// - Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and -/// replaces all uses except the canonical IV increment of -/// VPCanonicalIVPHIRecipe with a VPCurrentIterationPHIRecipe. -/// VPCanonicalIVPHIRecipe is used only for loop iterations counting after -/// this transformation. -/// /// - The header mask is replaced with a header mask based on the EVL. /// /// - Plans with FORs have a new phi added to keep track of the EVL of the @@ -3270,12 +3304,9 @@ void VPlanTransforms::addExplicitVectorLength( auto *CanonicalIVPHI = LoopRegion->getCanonicalIV(); auto *CanIVTy = LoopRegion->getCanonicalIVType(); - VPValue *StartV = CanonicalIVPHI->getStartValue(); + auto *CanonicalIVIncrement = + cast(CanonicalIVPHI->getBackedgeValue()); - // Create the CurrentIteration recipe in the vector loop. - auto *CurrentIteration = - new VPCurrentIterationPHIRecipe(StartV, DebugLoc::getUnknown()); - CurrentIteration->insertAfter(CanonicalIVPHI); VPBuilder Builder(Header, Header->getFirstNonPhi()); // Create the AVL (application vector length), starting from TC -> 0 in steps // of EVL. @@ -3292,52 +3323,27 @@ void VPlanTransforms::addExplicitVectorLength( } auto *VPEVL = Builder.createNaryOp(VPInstruction::ExplicitVectorLength, AVL, DebugLoc::getUnknown(), "evl"); - - auto *CanonicalIVIncrement = - cast(CanonicalIVPHI->getBackedgeValue()); - Builder.setInsertPoint(CanonicalIVIncrement); - VPValue *OpVPEVL = VPEVL; - + // Cast EVL to the canonical IV type. auto *I32Ty = Type::getInt32Ty(Plan.getContext()); - OpVPEVL = Builder.createScalarZExtOrTrunc( - OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc()); - - auto *NextIter = Builder.createAdd(OpVPEVL, CurrentIteration, - CanonicalIVIncrement->getDebugLoc(), - "current.iteration.next", - {CanonicalIVIncrement->hasNoUnsignedWrap(), - CanonicalIVIncrement->hasNoSignedWrap()}); - CurrentIteration->addOperand(NextIter); + VPValue *OpVPEVL = Builder.createScalarZExtOrTrunc( + VPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc()); + addCurrentIterationPhi(Plan, OpVPEVL); + Builder.setInsertPoint(CanonicalIVIncrement); VPValue *NextAVL = Builder.createSub(AVLPhi, OpVPEVL, DebugLoc::getCompilerGenerated(), "avl.next", {/*NUW=*/true, /*NSW=*/false}); AVLPhi->addOperand(NextAVL); - fixupVFUsersForEVL(Plan, *VPEVL); removeDeadRecipes(Plan); - // Replace all uses of VPCanonicalIVPHIRecipe by - // VPCurrentIterationPHIRecipe except for the canonical IV increment. - CanonicalIVPHI->replaceAllUsesWith(CurrentIteration); - CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); // TODO: support unroll factor > 1. Plan.setUF(1); } void VPlanTransforms::convertToVariableLengthStep(VPlan &Plan) { - // Find the vector loop entry by locating VPCurrentIterationPHIRecipe. - // There should be only one VPCurrentIteration in the entire plan. - VPCurrentIterationPHIRecipe *CurrentIteration = nullptr; - - for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( - vp_depth_first_shallow(Plan.getEntry()))) - for (VPRecipeBase &R : VPBB->phis()) - if (auto *PhiR = dyn_cast(&R)) { - assert(!CurrentIteration && - "Found multiple CurrentIteration. Only one expected"); - CurrentIteration = PhiR; - } + VPCurrentIterationPHIRecipe *CurrentIteration = + vputils::getCurrentIterationPhi(Plan); // Early return if it is not variable-length stepping. if (!CurrentIteration) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 972a18ebded63..b997536695a77 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -285,11 +285,8 @@ struct VPlanTransforms { VPlan &Plan, const std::function &BlockNeedsPredication); - /// Add a VPCurrentIterationPHIRecipe and related recipes to \p Plan and - /// replaces all uses except the canonical IV increment of - /// VPCanonicalIVPHIRecipe with a VPCurrentIterationPHIRecipe. - /// VPCanonicalIVPHIRecipe is only used to control the loop after - /// this transformation. + /// Convert a tail-folded vector loop to use explicit vector length (EVL) + /// instead of VF elements each iteration. static void addExplicitVectorLength(VPlan &Plan, const std::optional &MaxEVLSafeElements); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index f5318bb1c6515..e9d730fbd80b0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -570,13 +570,13 @@ vputils::getRecipesForUncountableExit(VPlan &Plan, VPSingleDefRecipe *vputils::findHeaderMask(VPlan &Plan) { VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); SmallVector WideCanonicalIVs; - auto *FoundWidenCanonicalIVUser = find_if( - LoopRegion->getCanonicalIV()->users(), IsaPred); - assert(count_if(LoopRegion->getCanonicalIV()->users(), + VPHeaderPHIRecipe *CurrentIteration = LoopRegion->getCurrentIteration(); + auto *FoundWidenCanonicalIVUser = + find_if(CurrentIteration->users(), IsaPred); + assert(count_if(CurrentIteration->users(), IsaPred) <= 1 && "Must have at most one VPWideCanonicalIVRecipe"); - if (FoundWidenCanonicalIVUser != - LoopRegion->getCanonicalIV()->users().end()) { + if (FoundWidenCanonicalIVUser != CurrentIteration->users().end()) { auto *WideCanonicalIV = cast(*FoundWidenCanonicalIVUser); WideCanonicalIVs.push_back(WideCanonicalIV); @@ -665,3 +665,28 @@ VPInstruction *vputils::findComputeReductionResult(VPReductionPHIRecipe *PhiR) { return vputils::findUserOf( cast(SelR)); } + +VPCurrentIterationPHIRecipe *vputils::getCurrentIterationPhi(VPlan &Plan) { + VPCurrentIterationPHIRecipe *CurrentIteration = nullptr; + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + if (LoopRegion) { + VPCanonicalIVPHIRecipe *CanIV = LoopRegion->getCanonicalIV(); + if (std::next(CanIV->getIterator()) != CanIV->getParent()->end()) + CurrentIteration = dyn_cast_or_null( + std::next(CanIV->getIterator())); + return CurrentIteration; + } + + // Find the vector loop entry by locating VPCurrentIterationPHIRecipe. + // There should be only one VPCurrentIteration in the entire plan. + for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( + vp_depth_first_shallow(Plan.getEntry()))) + for (VPRecipeBase &R : VPBB->phis()) + if (auto *PhiR = dyn_cast(&R)) { + assert(!CurrentIteration && + "Found multiple CurrentIteration. Only one expected"); + CurrentIteration = PhiR; + } + + return CurrentIteration; +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index a5692699d9d76..fbf990da5bd04 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -151,6 +151,8 @@ VPInstruction *findComputeReductionResult(VPReductionPHIRecipe *PhiR); /// the header-mask pattern manually. VPSingleDefRecipe *findHeaderMask(VPlan &Plan); +/// Return VPCurrentIterationPHIRecipe if present, otherwise nullptr. +VPCurrentIterationPHIRecipe *getCurrentIterationPhi(VPlan &Plan); } // namespace vputils //===----------------------------------------------------------------------===// diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll index 024194db39332..b627b74578cb6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll @@ -33,13 +33,13 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; ZVFBFMIN-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFBFMIN-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFBFMIN-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; ZVFBFMIN-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; ZVFBFMIN-NEXT: [[TMP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP0]] ; ZVFBFMIN-NEXT: [[TMP2:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP0]] ; ZVFBFMIN-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8bf16.p0(ptr align 2 [[TMP1]], splat (i1 true), i32 [[TMP6]]) ; ZVFBFMIN-NEXT: [[WIDE_LOAD1:%.*]] = call @llvm.vp.load.nxv8bf16.p0(ptr align 2 [[TMP2]], splat (i1 true), i32 [[TMP6]]) ; ZVFBFMIN-NEXT: [[TMP11:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] ; ZVFBFMIN-NEXT: call void @llvm.vp.store.nxv8bf16.p0( [[TMP11]], ptr align 2 [[TMP1]], splat (i1 true), i32 [[TMP6]]) -; ZVFBFMIN-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[TMP0]] ; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; ZVFBFMIN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -125,6 +125,7 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 ; ZVFBFMIN-NEXT: [[TMP6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFBFMIN-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFBFMIN-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; ZVFBFMIN-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 ; ZVFBFMIN-NEXT: [[TMP7:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[TMP6]] ; ZVFBFMIN-NEXT: [[TMP8:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[TMP6]] ; ZVFBFMIN-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[C]], i64 [[TMP6]] @@ -135,11 +136,10 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 ; ZVFBFMIN-NEXT: [[TMP14:%.*]] = fpext [[WIDE_LOAD1]] to ; ZVFBFMIN-NEXT: [[TMP15:%.*]] = call @llvm.fmuladd.nxv4f32( [[TMP13]], [[TMP14]], [[WIDE_LOAD2]]) ; ZVFBFMIN-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[TMP15]], ptr align 4 [[TMP9]], splat (i1 true), i32 [[TMP11]]) -; ZVFBFMIN-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 ; ZVFBFMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[TMP6]] ; ZVFBFMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; ZVFBFMIN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; ZVFBFMIN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; ZVFBFMIN-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; ZVFBFMIN: [[MIDDLE_BLOCK]]: ; ZVFBFMIN-NEXT: br label %[[EXIT:.*]] ; ZVFBFMIN: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll index 263c200c28801..de6d28e535b8d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll @@ -389,12 +389,12 @@ define void @empty_block_with_phi_1(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP10]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq [[VP_OP_LOAD]], zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP2]], splat (i16 99), [[VP_OP_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( [[PREDPHI]], ptr align 2 [[TMP10]], splat (i1 true), i32 [[TMP13]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -441,12 +441,12 @@ define void @empty_block_with_phi_2(ptr %src, i64 %N) #0 { ; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP9]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP10]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP12]], [[WIDE_LOAD]], splat (i16 99) ; CHECK-NEXT: call void @llvm.vp.store.nxv8i16.p0( [[PREDPHI]], ptr align 2 [[TMP10]], splat (i1 true), i32 [[TMP13]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[TMP9]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index 3fd90b2848848..ff8e14dd51716 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -22,11 +22,11 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP9]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -64,15 +64,15 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP8]]) ; CHECK-NEXT: [[TMP12:%.*]] = add [[VEC_PHI]], [[VP_OP_LOAD]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[TMP12]], [[VEC_PHI]], i32 [[TMP8]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP9]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 9feaa4edad29c..d519217f33443 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -19,11 +19,11 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP9]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -83,12 +83,12 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[BROADCAST_SPLAT]], splat (i64 1), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP2]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP9]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -148,11 +148,11 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP9:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP9]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -212,12 +212,12 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[BROADCAST_SPLAT]], splat (i64 1), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP9:%.*]] = srem [[WIDE_LOAD]], [[TMP2]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP9]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -278,6 +278,7 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vp.merge.nxv2i64( [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1), i32 [[TMP12]]) @@ -285,7 +286,6 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -357,6 +357,7 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vp.merge.nxv2i64( [[TMP6]], [[BROADCAST_SPLAT]], splat (i64 1), i32 [[TMP12]]) @@ -364,7 +365,6 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -433,13 +433,13 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP10:%.*]] = udiv [[WIDE_LOAD]], splat (i64 27) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP2]], [[TMP10]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP14]]) -; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP12]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -503,13 +503,13 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne [[WIDE_LOAD]], splat (i64 42) ; CHECK-NEXT: [[TMP10:%.*]] = sdiv [[WIDE_LOAD]], splat (i64 27) ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP2]], [[TMP10]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP14]]) -; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP12]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -573,6 +573,7 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne [[WIDE_LOAD]], splat (i8 -128) @@ -580,7 +581,6 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[PREDPHI]], ptr align 1 [[TMP7]], splat (i1 true), i32 [[TMP12]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll index 7953ba30cf9a8..5c7b80d5c78d2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll @@ -63,9 +63,9 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) { ; CHECK-NEXT: [[TMP6:%.*]] = shl [[TMP5]], splat (i64 3) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2p0.p0( [[VECTOR_GEP]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll index 143a51dc811f1..6b6f43685b87b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll @@ -33,13 +33,13 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFHMIN-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFHMIN-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; ZVFHMIN-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; ZVFHMIN-NEXT: [[TMP1:%.*]] = getelementptr half, ptr [[A]], i64 [[INDEX]] ; ZVFHMIN-NEXT: [[TMP2:%.*]] = getelementptr half, ptr [[B]], i64 [[INDEX]] ; ZVFHMIN-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 2 [[TMP1]], splat (i1 true), i32 [[TMP6]]) ; ZVFHMIN-NEXT: [[WIDE_LOAD1:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 2 [[TMP2]], splat (i1 true), i32 [[TMP6]]) ; ZVFHMIN-NEXT: [[TMP11:%.*]] = fadd [[WIDE_LOAD]], [[WIDE_LOAD1]] ; ZVFHMIN-NEXT: call void @llvm.vp.store.nxv8f16.p0( [[TMP11]], ptr align 2 [[TMP1]], splat (i1 true), i32 [[TMP6]]) -; ZVFHMIN-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; ZVFHMIN-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[INDEX]] ; ZVFHMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; ZVFHMIN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index d28d33bb4eaec..846447544b7bc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -24,12 +24,12 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 23, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP2]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP9]], splat (i1 true), i32 [[TMP6]]) ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.experimental.vp.splice.nxv2i64( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP6]]) ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP10]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP6]]) -; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP12]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll index 723d4f16e289e..08f464fb24e1a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/fminimumnum.ll @@ -608,6 +608,7 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 4096, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 2 [[TMP2]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] @@ -615,7 +616,6 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP17:%.*]] = call @llvm.minimumnum.nxv8f16( [[WIDE_LOAD]], [[WIDE_LOAD5]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8f16.p0( [[TMP17]], ptr align 2 [[TMP7]], splat (i1 true), i32 [[TMP13]]) -; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -663,6 +663,7 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFHMIN-NEXT: [[AVL:%.*]] = phi i64 [ 4096, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFHMIN-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; ZVFHMIN-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 ; ZVFHMIN-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 2 [[TMP13]], splat (i1 true), i32 [[TMP19]]) ; ZVFHMIN-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] @@ -670,7 +671,6 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP17:%.*]] = call @llvm.minimumnum.nxv8f16( [[WIDE_LOAD]], [[WIDE_LOAD5]]) ; ZVFHMIN-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: call void @llvm.vp.store.nxv8f16.p0( [[TMP17]], ptr align 2 [[TMP18]], splat (i1 true), i32 [[TMP19]]) -; ZVFHMIN-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 ; ZVFHMIN-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; ZVFHMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; ZVFHMIN-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -741,6 +741,7 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 4096, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 2 [[TMP2]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] @@ -748,7 +749,6 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK-NEXT: [[TMP17:%.*]] = call @llvm.maximumnum.nxv8f16( [[WIDE_LOAD]], [[WIDE_LOAD5]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8f16.p0( [[TMP17]], ptr align 2 [[TMP7]], splat (i1 true), i32 [[TMP13]]) -; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -796,6 +796,7 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFHMIN-NEXT: [[AVL:%.*]] = phi i64 [ 4096, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; ZVFHMIN-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; ZVFHMIN-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 ; ZVFHMIN-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 2 [[TMP13]], splat (i1 true), i32 [[TMP19]]) ; ZVFHMIN-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] @@ -803,7 +804,6 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; ZVFHMIN-NEXT: [[TMP17:%.*]] = call @llvm.maximumnum.nxv8f16( [[WIDE_LOAD]], [[WIDE_LOAD5]]) ; ZVFHMIN-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] ; ZVFHMIN-NEXT: call void @llvm.vp.store.nxv8f16.p0( [[TMP17]], ptr align 2 [[TMP18]], splat (i1 true), i32 [[TMP19]]) -; ZVFHMIN-NEXT: [[TMP16:%.*]] = zext i32 [[TMP19]] to i64 ; ZVFHMIN-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; ZVFHMIN-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; ZVFHMIN-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll index 66a7493b067c8..1752ea0814fcc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/induction-costs.ll @@ -137,6 +137,7 @@ define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 { ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi [ [[TMP2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[TMP3]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP4]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer @@ -144,7 +145,6 @@ define void @test_3_inductions(ptr noalias %dst, ptr noalias %src, i64 %n) #1 { ; CHECK-NEXT: [[TMP6:%.*]] = sext [[TMP5]] to ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], [[TMP6]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2p0.nxv2p0( [[TMP7]], align 8 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT3]] ; CHECK-NEXT: [[VEC_IND_NEXT4]] = add [[VEC_IND1]], [[BROADCAST_SPLAT3]] @@ -191,6 +191,7 @@ define void @redundant_iv_trunc_for_cse(ptr noalias %src, ptr noalias %dst, i64 ; CHECK-NEXT: [[VEC_IND1:%.*]] = phi [ [[TMP1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP3]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[EVL_BASED_IV]] @@ -201,7 +202,6 @@ define void @redundant_iv_trunc_for_cse(ptr noalias %src, ptr noalias %dst, i64 ; CHECK-NEXT: [[TMP7:%.*]] = trunc [[PREDPHI]] to ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv4i8.p0( [[TMP7]], ptr align 1 [[TMP8]], splat (i1 true), i32 [[TMP3]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index c520f667c2eeb..e286be0638d20 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -292,12 +292,12 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp slt [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = select [[TMP13]], [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP14]], [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -318,11 +318,11 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-INLOOP-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP13]], i32 [[VEC_PHI]]) -; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index 7ff7e6deaf503..5259e5a058131 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -13,6 +13,7 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP14]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -25,7 +26,6 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 2 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP10]], [[TMP11]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[INTERLEAVED_VEC]], ptr align 4 [[TMP15]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -69,6 +69,7 @@ define void @load_store_factor2_i32(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP14:%.*]] = shl i64 [[INDEX]], 1 ; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP14]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -81,7 +82,6 @@ define void @load_store_factor2_i32(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 2 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[TMP10]], [[TMP11]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv8i32.p0( [[INTERLEAVED_VEC]], ptr align 4 [[TMP15]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -125,6 +125,7 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -137,7 +138,6 @@ define void @load_store_factor2_i64(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 2 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP13]], [[TMP11]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP14]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -181,6 +181,7 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 1 ; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -193,7 +194,6 @@ define void @load_store_factor2_i64(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 2 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP13]], [[TMP11]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP14]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -237,6 +237,7 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP16]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 3 @@ -251,7 +252,6 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 3 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv12i32( [[TMP11]], [[TMP12]], [[TMP13]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv12i32.p0( [[INTERLEAVED_VEC]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -299,6 +299,7 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; SCALABLE-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[INDEX]], 3 ; SCALABLE-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP16]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 3 @@ -313,7 +314,6 @@ define void @load_store_factor3_i32(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 3 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv12i32( [[TMP11]], [[TMP12]], [[TMP13]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv12i32.p0( [[INTERLEAVED_VEC]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -363,6 +363,7 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 3 @@ -377,7 +378,6 @@ define void @load_store_factor3_i64(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 3 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv6i64( [[TMP25]], [[TMP12]], [[TMP13]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv6i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP14]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -425,6 +425,7 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[INDEX]], 3 ; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 3 @@ -439,7 +440,6 @@ define void @load_store_factor3_i64(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 3 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave3.nxv6i64( [[TMP25]], [[TMP12]], [[TMP13]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv6i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP14]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP19:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP19]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; SCALABLE-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -489,6 +489,7 @@ define void @load_store_factor4(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 4 @@ -505,7 +506,6 @@ define void @load_store_factor4(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 4 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave4.nxv8i64( [[TMP26]], [[TMP15]], [[TMP16]], [[TMP17]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv8i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP9]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -555,6 +555,7 @@ define void @load_store_factor4(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP22:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2 ; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP8]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 4 @@ -571,7 +572,6 @@ define void @load_store_factor4(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 4 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave4.nxv8i64( [[TMP26]], [[TMP15]], [[TMP16]], [[TMP17]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv8i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP9]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP22:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP22]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]] ; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -627,6 +627,7 @@ define void @load_store_factor5(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 5 @@ -645,7 +646,6 @@ define void @load_store_factor5(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 5 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave5.nxv5i64( [[TMP13]], [[TMP14]], [[TMP15]], [[TMP16]], [[TMP17]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv5i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP19]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP25]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -699,6 +699,7 @@ define void @load_store_factor5(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; SCALABLE-NEXT: [[TMP25:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 5 ; SCALABLE-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 5 @@ -717,7 +718,6 @@ define void @load_store_factor5(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 5 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave5.nxv5i64( [[TMP13]], [[TMP14]], [[TMP15]], [[TMP16]], [[TMP17]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv5i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP19]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP25:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP25]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -779,6 +779,7 @@ define void @load_store_factor6(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; CHECK-NEXT: [[TMP28:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 6 @@ -799,7 +800,6 @@ define void @load_store_factor6(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 6 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave6.nxv6i64( [[TMP14]], [[TMP15]], [[TMP16]], [[TMP17]], [[TMP18]], [[TMP19]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv6i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP28:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP28]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -856,6 +856,7 @@ define void @load_store_factor6(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; SCALABLE-NEXT: [[TMP28:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 6 ; SCALABLE-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 6 @@ -876,7 +877,6 @@ define void @load_store_factor6(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 6 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave6.nxv6i64( [[TMP14]], [[TMP15]], [[TMP16]], [[TMP17]], [[TMP18]], [[TMP19]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv6i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP21]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP28:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP28]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP28]] ; SCALABLE-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -944,6 +944,7 @@ define void @load_store_factor7(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 7 @@ -966,7 +967,6 @@ define void @load_store_factor7(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 7 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave7.nxv7i64( [[TMP15]], [[TMP16]], [[TMP17]], [[TMP18]], [[TMP19]], [[TMP20]], [[TMP21]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv7i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP23]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP31]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP31]] ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1027,6 +1027,7 @@ define void @load_store_factor7(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; SCALABLE-NEXT: [[TMP31:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 [[INDEX]], 7 ; SCALABLE-NEXT: [[TMP23:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 7 @@ -1049,7 +1050,6 @@ define void @load_store_factor7(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 7 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave7.nxv7i64( [[TMP15]], [[TMP16]], [[TMP17]], [[TMP18]], [[TMP19]], [[TMP20]], [[TMP21]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv7i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP23]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP31:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP31]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP31]] ; SCALABLE-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1123,6 +1123,7 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; CHECK-NEXT: [[TMP34:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 8 @@ -1147,7 +1148,6 @@ define void @load_store_factor8(ptr %p) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 8 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave8.nxv8i64( [[TMP16]], [[TMP17]], [[TMP18]], [[TMP19]], [[TMP20]], [[TMP21]], [[TMP22]], [[TMP23]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv8i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP24]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP34:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP34]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP34]] ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1209,6 +1209,7 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true) +; SCALABLE-NEXT: [[TMP34:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 3 ; SCALABLE-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP6]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 8 @@ -1233,7 +1234,6 @@ define void @load_store_factor8(ptr %p) { ; SCALABLE-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP7]], 8 ; SCALABLE-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave8.nxv8i64( [[TMP16]], [[TMP17]], [[TMP18]], [[TMP19]], [[TMP20]], [[TMP21]], [[TMP22]], [[TMP23]]) ; SCALABLE-NEXT: call void @llvm.vp.store.nxv8i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP24]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; SCALABLE-NEXT: [[TMP34:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP34]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP34]] ; SCALABLE-NEXT: [[TMP25:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1313,6 +1313,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP13]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -1323,7 +1324,6 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP10:%.*]] = add [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP10]], ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1365,6 +1365,7 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP13:%.*]] = shl i64 [[INDEX]], 1 ; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP13]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -1375,7 +1376,6 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP10:%.*]] = add [[TMP8]], [[TMP9]] ; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP10]], ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP7]]) -; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1420,6 +1420,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP13]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -1430,7 +1431,6 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP10:%.*]] = add [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP10]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1472,6 +1472,7 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[TMP13:%.*]] = shl i64 [[INDEX]], 1 ; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP13]] ; SCALABLE-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP7]], 2 @@ -1482,7 +1483,6 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; SCALABLE-NEXT: [[TMP10:%.*]] = add [[TMP8]], [[TMP9]] ; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP10]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP7]]) -; SCALABLE-NEXT: [[TMP16:%.*]] = zext i32 [[TMP7]] to i64 ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP16]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index 328ee16a92db4..cfc9640b6f7bf 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -52,11 +52,11 @@ define void @load_store(ptr %p) { ; LMUL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; LMUL2-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; LMUL2-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; LMUL2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP6]] to i64 ; LMUL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[INDEX]] ; LMUL2-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP6]]) ; LMUL2-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], splat (i64 1) ; LMUL2-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP7]], ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP6]]) -; LMUL2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP6]] to i64 ; LMUL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; LMUL2-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; LMUL2-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -75,11 +75,11 @@ define void @load_store(ptr %p) { ; LMUL4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; LMUL4-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; LMUL4-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; LMUL4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP6]] to i64 ; LMUL4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[INDEX]] ; LMUL4-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i64.p0(ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP6]]) ; LMUL4-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], splat (i64 1) ; LMUL4-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[TMP7]], ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP6]]) -; LMUL4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP6]] to i64 ; LMUL4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; LMUL4-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; LMUL4-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -98,11 +98,11 @@ define void @load_store(ptr %p) { ; LMUL8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; LMUL8-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; LMUL8-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; LMUL8-NEXT: [[TMP10:%.*]] = zext i32 [[TMP6]] to i64 ; LMUL8-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[INDEX]] ; LMUL8-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8i64.p0(ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP6]]) ; LMUL8-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], splat (i64 1) ; LMUL8-NEXT: call void @llvm.vp.store.nxv8i64.p0( [[TMP7]], ptr align 8 [[TMP5]], splat (i1 true), i32 [[TMP6]]) -; LMUL8-NEXT: [[TMP10:%.*]] = zext i32 [[TMP6]] to i64 ; LMUL8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; LMUL8-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; LMUL8-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index d41f5e4d92e58..c1f6629cc8303 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -159,6 +159,7 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) ; TAILFOLD-NEXT: [[TMP2:%.*]] = sext [[VP_OP_LOAD]] to @@ -168,7 +169,6 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] ; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] ; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) -; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -352,6 +352,7 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) ; TAILFOLD-NEXT: [[TMP2:%.*]] = zext [[VP_OP_LOAD]] to @@ -361,7 +362,6 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] ; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] ; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) -; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -545,6 +545,7 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) ; TAILFOLD-NEXT: [[TMP2:%.*]] = zext [[VP_OP_LOAD]] to @@ -554,7 +555,6 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] ; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] ; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) -; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -737,6 +737,7 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) ; TAILFOLD-NEXT: [[TMP2:%.*]] = sext [[VP_OP_LOAD]] to @@ -746,7 +747,6 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] ; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] ; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) -; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 ; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll index 786ef735fc7ad..103488c316c8c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll @@ -97,13 +97,13 @@ define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c ; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP13]], splat (i64 12) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP14]] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, [[VECTOR_GEP]], i64 4 ; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.vp.gather.nxv2i32.nxv2p0( align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]] ; CHECK-NEXT: [[TMP19:%.*]] = zext [[TMP18]] to ; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP19]], splat (i64 -7070675565921424023) ; CHECK-NEXT: [[TMP21:%.*]] = add [[TMP20]], splat (i64 -4) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[TMP21]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]] -; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] ; CHECK-NEXT: [[TMP27:%.*]] = mul i64 12, [[TMP26]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP27]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll index fa247b1a042a3..1b6f401253264 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/predicated-reverse-store.ll @@ -14,11 +14,11 @@ define void @reverse_predicated_store(i1 %c, ptr %dst, i64 %n) #0 { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[IV:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[IV_NEXT:%.*]] = add i64 [[IV]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 [[IV_NEXT]] ; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( zeroinitializer, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = sub nuw nsw i64 [[TMP4]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[ARRAYIDX]], i64 [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll index 671a929e6fa35..25884fe15b6e3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll @@ -17,11 +17,11 @@ define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP7]], [[VEC_PHI]], i32 [[TMP13]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -61,11 +61,11 @@ define i32 @sub(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 1024, i32 0), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP0]], splat (i1 true), i32 [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = sub [[VEC_PHI]], [[VP_OP_LOAD]] ; CHECK-NEXT: [[TMP3]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP2]], [[VEC_PHI]], i32 [[TMP1]]) -; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP4]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -105,6 +105,7 @@ define i32 @addsub(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP0]], splat (i1 true), i32 [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = add [[VEC_PHI]], [[VP_OP_LOAD]] @@ -112,7 +113,6 @@ define i32 @addsub(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP4]], splat (i1 true), i32 [[TMP1]]) ; CHECK-NEXT: [[TMP9:%.*]] = sub [[TMP2]], [[VP_OP_LOAD1]] ; CHECK-NEXT: [[TMP5]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP9]], [[VEC_PHI]], i32 [[TMP1]]) -; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP6]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -158,11 +158,11 @@ define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP7:%.*]] = or [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP7]], [[VEC_PHI]], i32 [[TMP13]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -204,11 +204,11 @@ define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (i32 -1), i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP7:%.*]] = and [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP7]], [[VEC_PHI]], i32 [[TMP13]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -250,11 +250,11 @@ define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP7:%.*]] = xor [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP7]], [[VEC_PHI]], i32 [[TMP13]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -296,12 +296,12 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ splat (i32 2), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -344,12 +344,12 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ splat (i32 2), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -392,11 +392,11 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP7]], [[VEC_PHI]], i32 [[TMP13]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -436,11 +436,11 @@ define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "tar ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP7:%.*]] = fadd fast [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv8f16( splat (i1 true), [[TMP7]], [[VEC_PHI]], i32 [[TMP13]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -606,12 +606,12 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -652,12 +652,12 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv8f16( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -698,12 +698,12 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8bf16.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv8bf16( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -746,12 +746,12 @@ define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -792,12 +792,12 @@ define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv8f16( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -838,12 +838,12 @@ define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8bf16.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv8bf16( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1026,13 +1026,13 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (float -0.000000e+00), float 0.000000e+00, i32 0), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP7]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP8:%.*]] = call reassoc @llvm.fmuladd.nxv4f32( [[WIDE_LOAD]], [[WIDE_LOAD1]], [[VEC_PHI]]) ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1074,13 +1074,13 @@ define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ insertelement ( splat (half 0xH8000), half 0xH0000, i32 0), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = call @llvm.vp.load.nxv8f16.p0(ptr align 4 [[TMP7]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP8:%.*]] = call reassoc @llvm.fmuladd.nxv8f16( [[WIDE_LOAD]], [[WIDE_LOAD1]], [[VEC_PHI]]) ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv8f16( splat (i1 true), [[TMP8]], [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll index 097f05d222cf6..d53f8e1ab6573 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll @@ -5,7 +5,7 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea ; CHECK-LABEL: add ; CHECK: LV(REG): VF = vscale x 4 ; CHECK-NEXT: LV(REG): Found max usage: 2 item -; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll index 8bbfdf39a0624..b64398fcfc767 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll @@ -6,14 +6,14 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea ; ZVFH-LABEL: add ; ZVFH: LV(REG): VF = vscale x 4 ; ZVFH-NEXT: LV(REG): Found max usage: 2 item -; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; ZVFHMIN-LABEL: add ; ZVFHMIN: LV(REG): VF = vscale x 4 ; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item -; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item ; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll index 6bb0d64314d3e..3be6ae1d55165 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll @@ -4,7 +4,7 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-REGS-VP: LV(REG): VF = vscale x 16 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item -; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers ; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll index 99139da67bb78..3f73f3f02b555 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll @@ -31,28 +31,28 @@ define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture rea ; CHECK-LMUL1-LABEL: add ; CHECK-LMUL1: LV(REG): VF = vscale x 2 ; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL2-LABEL: add ; CHECK-LMUL2: LV(REG): VF = vscale x 4 ; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL4-LABEL: add ; CHECK-LMUL4: LV(REG): VF = vscale x 8 ; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers ; CHECK-LMUL8-LABEL: add ; CHECK-LMUL8: LV(REG): VF = vscale x 16 ; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers ; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers @@ -86,19 +86,19 @@ define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers ; CHECK-LMUL1: LV(REG): VF = vscale x 2 ; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers ; CHECK-LMUL2: LV(REG): VF = vscale x 4 ; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers ; CHECK-LMUL4: LV(REG): VF = vscale x 8 ; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers ; CHECK-LMUL8: LV(REG): VF = vscale x 16 ; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item -; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers +; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 7 registers ; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers entry: %cmp3 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index c50fdc66e7cc6..47a9eeedbb7b1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -26,10 +26,10 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] -; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 ; RV64-NEXT: [[TMP4:%.*]] = sub nuw nsw i64 [[TMP22]], 1 ; RV64-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], -1 ; RV64-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP5]] @@ -59,6 +59,7 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP7]] @@ -72,7 +73,6 @@ define void @vector_reverse_i32(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP15]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP4]] ; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP9]]) -; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP23]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]] ; RV32-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -183,12 +183,12 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP20:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 ; RV64-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP22]] -; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 ; RV64-NEXT: [[TMP17:%.*]] = sub nuw nsw i64 [[TMP36]], 1 ; RV64-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], -1 ; RV64-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP23]], i64 [[TMP18]] @@ -238,6 +238,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 ; RV32-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 @@ -253,7 +254,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[TMP22]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP23]], i32 [[TMP10]] ; RV32-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP25]], splat (i1 true), i32 [[TMP16]]) -; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]] ; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -408,12 +408,12 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP20:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 ; RV64-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] ; RV64-NEXT: [[TMP21:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 ; RV64-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP22]] -; RV64-NEXT: [[TMP36:%.*]] = zext i32 [[TMP20]] to i64 ; RV64-NEXT: [[TMP17:%.*]] = sub nuw nsw i64 [[TMP36]], 1 ; RV64-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], -1 ; RV64-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP23]], i64 [[TMP18]] @@ -463,6 +463,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 ; RV32-NEXT: [[DOTCAST3:%.*]] = trunc i64 [[INDEX]] to i32 ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[N]], [[DOTCAST3]] ; RV32-NEXT: [[TMP13:%.*]] = add nsw i32 [[OFFSET_IDX]], -1 @@ -478,7 +479,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV32-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP22]], splat (i1 true), i32 [[TMP16]]) ; RV32-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 [[TMP10]] ; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP25]], splat (i1 true), i32 [[TMP16]]) -; RV32-NEXT: [[TMP29:%.*]] = zext i32 [[TMP16]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP29]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP29]] ; RV32-NEXT: [[TMP30:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -611,10 +611,10 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV64-NEXT: [[TMP19:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 ; RV64-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV64-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] -; RV64-NEXT: [[TMP22:%.*]] = zext i32 [[TMP19]] to i64 ; RV64-NEXT: [[TMP4:%.*]] = sub nuw nsw i64 [[TMP22]], 1 ; RV64-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], -1 ; RV64-NEXT: [[TMP13:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP5]] @@ -644,6 +644,7 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[AVL:%.*]] = phi i64 [ 1023, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; RV32-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 ; RV32-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; RV32-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP7]] @@ -657,7 +658,6 @@ define void @vector_reverse_f32_simplify(ptr noalias %A, ptr noalias %B) { ; RV32-NEXT: [[VP_REVERSE1:%.*]] = call @llvm.experimental.vp.reverse.nxv4f32( [[TMP15]], splat (i1 true), i32 [[TMP9]]) ; RV32-NEXT: [[TMP22:%.*]] = getelementptr float, ptr [[TMP16]], i32 [[TMP4]] ; RV32-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_REVERSE1]], ptr align 4 [[TMP22]], splat (i1 true), i32 [[TMP9]]) -; RV32-NEXT: [[TMP23:%.*]] = zext i32 [[TMP9]] to i64 ; RV32-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP23]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]] ; RV32-NEXT: [[TMP21:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index 02c363bb54457..d34c592525ea3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -17,12 +17,12 @@ define void @test(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 200, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP7]], splat (i1 true), i32 [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 200 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP_LOAD]], ptr align 32 [[TMP10]], splat (i1 true), i32 [[TMP8]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -102,12 +102,12 @@ define void @trivial_due_max_vscale(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 200, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP7]], splat (i1 true), i32 [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 8192 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP_LOAD]], ptr align 32 [[TMP10]], splat (i1 true), i32 [[TMP8]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -146,12 +146,12 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 200, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP7]], splat (i1 true), i32 [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1024 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP_LOAD]], ptr align 32 [[TMP10]], splat (i1 true), i32 [[TMP8]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index 7330ce61515d9..312c16039a4d1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -17,11 +17,11 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP8]], ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -63,15 +63,15 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP8]], ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: @@ -147,15 +147,15 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], align 8 [[TMP8]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: @@ -189,17 +189,17 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i64.nxv2p0( align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP9]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[TMP13]], [[VEC_PHI]], i32 [[TMP12]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP9]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -237,13 +237,13 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: @@ -276,13 +276,13 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2p0.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 3f999a5cc8fc5..1536477db4c28 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -18,11 +18,11 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP7:%.*]] = add [[VP_OP_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP7]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -63,11 +63,11 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], align 8 [[TMP10]], splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP12]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -105,13 +105,13 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i64.nxv2p0( align 8 [[TMP10]], splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP12:%.*]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[TMP11]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[TMP12]], [[VEC_PHI]], i32 [[TMP7]]) -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -153,9 +153,9 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP10]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -192,10 +192,10 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -231,12 +231,12 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[ARRAYIDX]], splat (i1 true), i32 [[TMP0]]) -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[TMP5]], [[IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -275,11 +275,11 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP10]]) ; CHECK-NEXT: [[TMP7:%.*]] = add [[VP_OP_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP7]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP10]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 8971b0cadfa48..8e01ec7593eeb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -15,11 +15,11 @@ define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp sge [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i1( [[TMP7]], splat (i1 true), [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -64,15 +64,15 @@ define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP14]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast uge [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i1( [[TMP7]], splat (i1 true), [[VEC_PHI]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] @@ -111,15 +111,15 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP21]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP21]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], splat (i32 3) ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i1( [[TMP7]], splat (i1 true), [[VEC_PHI]], i32 [[TMP21]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP21]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] @@ -158,15 +158,15 @@ define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP21]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP21]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], splat (i32 3) ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i1( [[TMP7]], splat (i1 true), [[VEC_PHI]], i32 [[TMP21]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP21]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] @@ -205,15 +205,15 @@ define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP21]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP21]]) ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast one [[WIDE_LOAD]], splat (float 3.000000e+00) ; CHECK-NEXT: [[TMP8]] = call @llvm.vp.merge.nxv4i1( [[TMP7]], splat (i1 true), [[VEC_PHI]], i32 [[TMP21]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP21]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] @@ -289,6 +289,7 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP17]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[TMP17]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt [[WIDE_LOAD]], splat (i32 35) @@ -298,11 +299,10 @@ define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1 ; CHECK-NEXT: [[TMP10:%.*]] = or [[VEC_PHI]], [[TMP9]] ; CHECK-NEXT: [[PREDPHI1:%.*]] = select [[TMP7]], [[TMP10]], [[VEC_PHI]] ; CHECK-NEXT: [[PREDPHI]] = call @llvm.vp.merge.nxv4i1( splat (i1 true), [[PREDPHI1]], [[VEC_PHI]], i32 [[TMP17]]) -; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP17]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[PREDPHI]]) ; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 85d9a11cb65e0..59fb6cd842206 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -223,10 +223,10 @@ define void @single_constant_stride_ptr_iv(ptr %p) { ; CHECK-NEXT: [[TMP16:%.*]] = shl [[TMP14]], splat (i64 3) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP16]] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP19:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP20:%.*]] = add [[TMP19]], splat (i32 1) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP20]], align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP9]], 3 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP5]] @@ -332,11 +332,11 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; NOSTRIDED-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP7]]) ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP10]], ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP7]]) -; NOSTRIDED-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; NOSTRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; NOSTRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -453,11 +453,11 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; NOSTRIDED-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP7]]) ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP10]], ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP7]]) -; NOSTRIDED-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; NOSTRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; NOSTRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -623,12 +623,12 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; NOSTRIDED-NEXT: [[TMP13:%.*]] = zext i32 [[TMP16]] to i64 ; NOSTRIDED-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[P]], i64 [[INDEX]] ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP16]]) ; NOSTRIDED-NEXT: [[TMP14:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[P2]], i64 [[INDEX]] ; NOSTRIDED-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP14]], ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP16]]) -; NOSTRIDED-NEXT: [[TMP13:%.*]] = zext i32 [[TMP16]] to i64 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] ; NOSTRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; NOSTRIDED-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -942,11 +942,11 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; NOSTRIDED-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; NOSTRIDED-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 ; NOSTRIDED-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDEX]] ; NOSTRIDED-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP7]]) ; NOSTRIDED-NEXT: [[TMP10:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP10]], ptr align 4 [[TMP8]], splat (i1 true), i32 [[TMP7]]) -; NOSTRIDED-NEXT: [[TMP11:%.*]] = zext i32 [[TMP7]] to i64 ; NOSTRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; NOSTRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; NOSTRIDED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1129,10 +1129,10 @@ define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) { ; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], [[TMP18]] ; STRIDED-NEXT: [[VECTOR_GEP7:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP18]] ; STRIDED-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[VECTOR_GEP7]], splat (i1 true), i32 [[TMP14]]), !alias.scope [[META12:![0-9]+]] -; STRIDED-NEXT: [[TMP30:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) -; STRIDED-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP30]], align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP14]]), !alias.scope [[META15:![0-9]+]], !noalias [[META12]] ; STRIDED-NEXT: [[TMP16:%.*]] = zext i32 [[TMP14]] to i64 +; STRIDED-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[VECTOR_GEP7]], splat (i1 true), i32 [[TMP14]]), !alias.scope [[META12:![0-9]+]] +; STRIDED-NEXT: [[TMP9:%.*]] = add [[WIDE_MASKED_GATHER]], splat (i32 1) +; STRIDED-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[TMP9]], align 4 [[VECTOR_GEP]], splat (i1 true), i32 [[TMP14]]), !alias.scope [[META15:![0-9]+]], !noalias [[META12]] ; STRIDED-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; STRIDED-NEXT: [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP16]] ; STRIDED-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP25]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll index 06d4c24459945..3c393c840b94a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-bin-unary-ops-args.ll @@ -27,12 +27,12 @@ define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = and [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -142,12 +142,12 @@ define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = or [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -257,12 +257,12 @@ define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = xor [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -372,12 +372,12 @@ define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = shl [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -487,12 +487,12 @@ define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = lshr [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -602,12 +602,12 @@ define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = ashr [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -717,12 +717,12 @@ define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -832,12 +832,12 @@ define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = sub [[VP_OP_LOAD]], splat (i8 1) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -947,12 +947,12 @@ define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = mul [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1062,12 +1062,12 @@ define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1177,12 +1177,12 @@ define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = udiv [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1292,12 +1292,12 @@ define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = srem [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1407,12 +1407,12 @@ define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = urem [[VP_OP_LOAD]], splat (i8 3) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0( [[VP_OP]], ptr align 1 [[TMP16]], splat (i1 true), i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP18]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1525,12 +1525,12 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = fadd fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1642,12 +1642,12 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = fsub fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1759,12 +1759,12 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = fmul fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1876,12 +1876,12 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = fdiv fast [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -2046,12 +2046,12 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = fneg fast [[VP_OP_LOAD]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[VP_OP]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP19]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll index 62a200d17c8a2..40bbe53dd721a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-call-intrinsics.ll @@ -32,6 +32,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[EVL_BASED_IV]] @@ -39,7 +40,6 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.smax.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -170,6 +170,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[EVL_BASED_IV]] @@ -177,7 +178,6 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.smin.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -308,6 +308,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[EVL_BASED_IV]] @@ -315,7 +316,6 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.umax.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -446,6 +446,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[EVL_BASED_IV]] @@ -453,7 +454,6 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) { ; IF-EVL-NEXT: [[TMP29:%.*]] = call @llvm.umin.nxv4i32( [[VP_OP_LOAD]], [[VP_OP_LOAD5]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP29]], ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -580,12 +580,12 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.ctlz.nxv4i32( [[VP_OP_LOAD]], i1 true) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP24]], ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -698,12 +698,12 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP13]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = call @llvm.cttz.nxv4i32( [[VP_OP_LOAD]], i1 true) ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP17]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP13]]) -; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP13]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -816,6 +816,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP27:%.*]] = fpext [[VP_OP_LOAD]] to @@ -823,7 +824,6 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP15:%.*]] = trunc [[TMP28]] to ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP15]], ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -944,6 +944,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP27:%.*]] = fpext [[VP_OP_LOAD]] to @@ -951,7 +952,6 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[TMP15:%.*]] = trunc [[TMP28]] to ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP15]], ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1072,12 +1072,12 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP24:%.*]] = call @llvm.abs.nxv4i32( [[VP_OP_LOAD]], i1 true) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP24]], ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll index b891aea634f1c..c2fec357a1164 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cast-intrinsics.ll @@ -27,12 +27,12 @@ define void @vp_sext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]] ; IF-EVL-NEXT: [[TMP16:%.*]] = sext [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP16]], ptr align 8 [[TMP17]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -144,12 +144,12 @@ define void @vp_zext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META9:![0-9]+]] ; IF-EVL-NEXT: [[TMP16:%.*]] = zext [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP16]], ptr align 8 [[TMP17]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META12:![0-9]+]], !noalias [[META9]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -261,12 +261,12 @@ define void @vp_trunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP14]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META16:![0-9]+]] ; IF-EVL-NEXT: [[TMP16:%.*]] = trunc [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i32.p0( [[TMP16]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META19:![0-9]+]], !noalias [[META16]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -378,12 +378,12 @@ define void @vp_fpext(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META23:![0-9]+]] ; IF-EVL-NEXT: [[TMP16:%.*]] = fpext [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f64.p0( [[TMP16]], ptr align 8 [[TMP17]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META26:![0-9]+]], !noalias [[META23]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -495,12 +495,12 @@ define void @vp_fptrunc(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2f64.p0(ptr align 8 [[TMP14]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META30:![0-9]+]] ; IF-EVL-NEXT: [[TMP16:%.*]] = fptrunc [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2f32.p0( [[TMP16]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META33:![0-9]+]], !noalias [[META30]] -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -612,12 +612,12 @@ define void @vp_sitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = sitofp [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[TMP18]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -729,12 +729,12 @@ define void @vp_uitofp(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = uitofp [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0( [[TMP18]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -846,12 +846,12 @@ define void @vp_fptosi(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = fptosi [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -963,12 +963,12 @@ define void @vp_fptoui(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = fptoui [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1080,12 +1080,12 @@ define void @vp_inttoptr(ptr %a, ptr %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP16]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-NEXT: [[TMP18:%.*]] = inttoptr [[VP_OP_LOAD]] to ; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2p0.p0( [[TMP18]], ptr align 8 [[TMP19]], splat (i1 true), i32 [[TMP14]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll index 1aa53e1ef95a0..0fb52275dea0d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-complex-mask.ll @@ -30,10 +30,11 @@ define void @test(i64 %n, ptr noalias %src0, ptr noalias %src1, ptr noalias %src ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i32 [[TMP7]], i64 0 -; IF-EVL-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i32() -; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult [[TMP8]], [[BROADCAST_SPLAT6]] +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP7]] to i64 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP21]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult [[TMP16]], [[BROADCAST_SPLAT6]] ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC0]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP10]], [[BROADCAST_SPLAT]], i32 [[TMP7]]) ; IF-EVL-NEXT: [[PREDPHI:%.*]] = select [[TMP3]], zeroinitializer, [[VP_OP_LOAD]] @@ -51,7 +52,6 @@ define void @test(i64 %n, ptr noalias %src0, ptr noalias %src1, ptr noalias %src ; IF-EVL-NEXT: [[PREDPHI9:%.*]] = select i1 [[C3]], [[TMP19]], [[PREDPHI8]] ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI9]], ptr align 4 [[TMP20]], [[TMP15]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP7]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP21]] ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll index e661254538908..650791573b13b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-cond-reduction.ll @@ -30,13 +30,13 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true) +; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = icmp sgt [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = select [[TMP18]], [[VP_OP_LOAD]], zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add [[TMP19]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP20]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP22]], [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP22]] ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -58,13 +58,13 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select [[TMP19]], [[VP_OP_LOAD]], zeroinitializer ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[TMP20]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] -; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP23]] ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -201,13 +201,13 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP10:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 4, i1 true) +; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-OUTLOOP-NEXT: [[TMP3:%.*]] = icmp sgt [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-OUTLOOP-NEXT: [[TMP19:%.*]] = add [[VEC_PHI]], [[VP_OP_LOAD]] ; IF-EVL-OUTLOOP-NEXT: [[PREDPHI1:%.*]] = select [[TMP3]], [[TMP19]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[PREDPHI]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[PREDPHI1]], [[VEC_PHI]], i32 [[TMP11]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP10]], [[TMP23]] ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -229,12 +229,12 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[VP_OP_LOAD]], [[TMP19]], i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] -; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP23]] ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -387,6 +387,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP10]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP12]], i64 0 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV1]] @@ -395,7 +396,6 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = select [[TMP16]], [[VP_OP_LOAD]], zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[TMP18:%.*]] = add [[TMP17]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP18]], [[VEC_PHI]], i32 [[TMP12]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP20]], [[EVL_BASED_IV1]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] @@ -420,6 +420,7 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV1]] @@ -428,7 +429,6 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[VP_OP_LOAD]], zeroinitializer ; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[TMP16]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-INLOOP-NEXT: [[ADD]] = add i32 [[TMP17]], [[RDX]] -; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT1]] = add i64 [[TMP19]], [[EVL_BASED_IV1]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] @@ -583,6 +583,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND2:%.*]] = phi [ [[TMP10]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP14]], i64 0 ; IF-EVL-OUTLOOP-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; IF-EVL-OUTLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] @@ -591,7 +592,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: [[TMP22:%.*]] = add [[VEC_PHI]], [[VP_OP_LOAD]] ; IF-EVL-OUTLOOP-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP22]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP24]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[PREDPHI]], [[VEC_PHI]], i32 [[TMP14]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP25:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]] ; IF-EVL-OUTLOOP-NEXT: [[VEC_IND_NEXT7]] = add [[VEC_IND2]], [[BROADCAST_SPLAT2]] @@ -616,6 +616,7 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[TMP11]], i64 0 ; IF-EVL-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] @@ -623,7 +624,6 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp sgt [[VP_OP_LOAD]], [[VEC_IND]] ; IF-EVL-INLOOP-NEXT: [[TMP16:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[VP_OP_LOAD]], [[TMP15]], i32 [[TMP11]]) ; IF-EVL-INLOOP-NEXT: [[TMP17]] = add i32 [[TMP16]], [[VEC_PHI]] -; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-INLOOP-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll index e185de8921680..348ddbd609583 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-div.ll @@ -18,6 +18,7 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]] @@ -26,7 +27,6 @@ define void @test_sdiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[VP_OP:%.*]] = sdiv [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -115,13 +115,13 @@ define void @test_sdiv_divisor_invariant_nonconst(ptr noalias %a, i64 %b, ptr no ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP1]], splat (i1 true), i32 [[TMP0]]) ; IF-EVL-NEXT: [[TMP2:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[BROADCAST_SPLAT]], splat (i64 1), i32 [[TMP0]]) ; IF-EVL-NEXT: [[TMP3:%.*]] = sdiv [[VP_OP_LOAD]], [[TMP2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP3]], ptr align 8 [[TMP4]], splat (i1 true), i32 [[TMP0]]) -; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -208,6 +208,7 @@ define void @test_sdiv_both_invariant_nonconst(ptr noalias %a, i64 %b, i64 %b2, ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP1]] to i64 ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP2]], splat (i1 true), i32 [[TMP1]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), [[BROADCAST_SPLAT2]], splat (i64 1), i32 [[TMP1]]) @@ -215,7 +216,6 @@ define void @test_sdiv_both_invariant_nonconst(ptr noalias %a, i64 %b, i64 %b2, ; IF-EVL-NEXT: [[TMP3:%.*]] = add [[VP_OP_LOAD]], [[TMP8]] ; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP3]], ptr align 8 [[TMP4]], splat (i1 true), i32 [[TMP1]]) -; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP1]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -301,13 +301,13 @@ define void @test_sdiv_divisor_invariant_minusone(ptr noalias %a, ptr noalias %c ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP1]], splat (i1 true), i32 [[TMP0]]) ; IF-EVL-NEXT: [[TMP2:%.*]] = call @llvm.vp.merge.nxv2i64( splat (i1 true), splat (i64 -1), splat (i64 1), i32 [[TMP0]]) ; IF-EVL-NEXT: [[TMP3:%.*]] = sdiv [[VP_OP_LOAD]], [[TMP2]] ; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP3]], ptr align 8 [[TMP4]], splat (i1 true), i32 [[TMP0]]) -; IF-EVL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -388,12 +388,12 @@ define void @test_sdiv_divisor_invariant_safeimm(ptr noalias %a, ptr noalias %c) ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP4:%.*]] = zext i32 [[TMP0]] to i64 ; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP1]], splat (i1 true), i32 [[TMP0]]) ; IF-EVL-NEXT: [[TMP2:%.*]] = sdiv [[VP_OP_LOAD]], splat (i64 3) ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[TMP2]], ptr align 8 [[TMP3]], splat (i1 true), i32 [[TMP0]]) -; IF-EVL-NEXT: [[TMP4:%.*]] = zext i32 [[TMP0]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP4]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; IF-EVL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -474,6 +474,7 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]] @@ -482,7 +483,6 @@ define void @test_udiv(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[VP_OP:%.*]] = udiv [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -569,6 +569,7 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]] @@ -577,7 +578,6 @@ define void @test_srem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[VP_OP:%.*]] = srem [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -664,6 +664,7 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[B]], i64 [[EVL_BASED_IV]] @@ -672,7 +673,6 @@ define void @test_urem(ptr noalias %a, ptr noalias %b, ptr noalias %c) { ; IF-EVL-NEXT: [[VP_OP:%.*]] = urem [[VP_OP_LOAD]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[C]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP12]], splat (i1 true), i32 [[TMP5]]) -; IF-EVL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index ab3f47079b981..66633e40e5421 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -29,13 +29,13 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP25]], %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP12]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = add nsw [[TMP16]], [[VP_OP_LOAD]] ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP19:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]] ; IF-EVL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -143,6 +143,7 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP32]], %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP15]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP15]] to i64 ; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP15]]) ; IF-EVL-NEXT: [[TMP19]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP15]]) @@ -150,7 +151,6 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[VP_OP:%.*]] = add nsw [[TMP19]], [[TMP20]] ; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP21]], splat (i1 true), i32 [[TMP15]]) -; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP15]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP23]] ; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -276,6 +276,7 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP39]], %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP18]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP18]] to i64 ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], splat (i1 true), i32 [[TMP18]]) ; IF-EVL-NEXT: [[TMP22]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[VP_OP_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP18]]) @@ -285,7 +286,6 @@ define void @third_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[VP_OP5:%.*]] = add [[TMP40]], [[TMP22]] ; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP5]], ptr align 4 [[TMP25]], splat (i1 true), i32 [[TMP18]]) -; IF-EVL-NEXT: [[TMP27:%.*]] = zext i32 [[TMP18]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP27]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP27]] ; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -417,13 +417,13 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[PREV_EVL:%.*]] = phi i32 [ [[TMP4]], %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[INDVARS]] ; IF-EVL-NEXT: [[WIDE_LOAD]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[ARRAYIDX]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP10:%.*]] = call @llvm.experimental.vp.splice.nxv4i32( [[VECTOR_RECUR]], [[WIDE_LOAD]], i32 -1, splat (i1 true), i32 [[PREV_EVL]], i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP11:%.*]] = add nsw [[TMP10]], [[WIDE_LOAD]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[INDVARS]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP11]], ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[INDVARS]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index 77818580ccf50..e1d6d1d7a65dd 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -20,11 +20,11 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -211,11 +211,11 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.or.nxv4i32(i32 0, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP15]] = or i32 [[TMP14]], [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -295,11 +295,11 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.and.nxv4i32(i32 -1, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP15]] = and i32 [[TMP14]], [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -379,11 +379,11 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 0, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP15]] = xor i32 [[TMP14]], [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -463,11 +463,11 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) -; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -549,11 +549,11 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -2147483648, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) -; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -635,11 +635,11 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 -1, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) -; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -721,11 +721,11 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 0, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP14]], i32 [[VEC_PHI]]) -; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -807,11 +807,11 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP15]] = fadd reassoc float [[TMP14]], [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -998,12 +998,12 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmin.nxv4f32(float 0x47EFFFFFE0000000, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1086,12 +1086,12 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmax.nxv4f32(float 0xC7EFFFFFE0000000, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP14]], [[VEC_PHI]] ; IF-EVL-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1390,6 +1390,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP10]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] @@ -1397,7 +1398,6 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP16:%.*]] = fmul reassoc [[VP_OP_LOAD]], [[VP_OP_LOAD1]] ; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP16]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP18]] = fadd reassoc float [[TMP17]], [[VEC_PHI]] -; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1484,11 +1484,11 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i1( [[TMP13]], splat (i1 true), [[VEC_PHI]], i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1576,11 +1576,11 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP11]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP16]] = call @llvm.vp.merge.nxv4i1( [[TMP13]], splat (i1 true), [[VEC_PHI]], i32 [[TMP11]]) -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP11]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll index 4a12dacbacfc9..d751eb1950a47 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-interleave.ll @@ -17,6 +17,7 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP16:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP16]] to i64 ; IF-EVL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], i64 [[EVL_BASED_IV]], i32 0 ; IF-EVL-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP16]], 2 ; IF-EVL-NEXT: [[WIDE_VEC:%.*]] = call @llvm.vp.load.nxv8i32.p0(ptr align 4 [[TMP6]], splat (i1 true), i32 [[INTERLEAVE_EVL]]) @@ -26,7 +27,6 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { ; IF-EVL-NEXT: [[TMP9:%.*]] = add nsw [[TMP15]], [[TMP14]] ; IF-EVL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP9]], ptr align 4 [[TMP10]], splat (i1 true), i32 [[TMP16]]) -; IF-EVL-NEXT: [[TMP8:%.*]] = zext i32 [[TMP16]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll index b063514370319..4d40a61843d72 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-intermediate-store.ll @@ -38,11 +38,11 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP10]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP11:%.*]] = phi i64 [ [[N]], [[ENTRY]] ], [ [[AVL_NEXT:%.*]], [[FOR_BODY]] ] ; IF-EVL-OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], splat (i1 true), i32 [[TMP12]]), !alias.scope [[META0:![0-9]+]] ; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-OUTLOOP-NEXT: [[TMP19]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP12]]) -; IF-EVL-OUTLOOP-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-OUTLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP21]] ; IF-EVL-OUTLOOP-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -85,11 +85,11 @@ define void @reduction_intermediate_store(ptr %a, i64 %n, i32 %start, ptr %addr) ; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = phi i64 [ [[N]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP13]], i32 4, i1 true) +; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP19]], splat (i1 true), i32 [[TMP14]]), !alias.scope [[META0:![0-9]+]] ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP14]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] -; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP14]] to i64 ; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] ; IF-EVL-INLOOP-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP13]], [[TMP23]] ; IF-EVL-INLOOP-NEXT: [[TMP15:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll index e94e64fe11d2f..814a3880e0a74 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-known-no-overflow.ll @@ -21,11 +21,11 @@ define void @trip_count_max_1024(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[UMAX]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i64 1) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -70,11 +70,11 @@ define void @overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TC]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; CHECK-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i64 1) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP9]]) -; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP13]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -119,11 +119,11 @@ define void @no_overflow_at_0(ptr %p, i64 %tc) vscale_range(2, 1024) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TC_ADD]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[P]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP5]]) ; CHECK-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], splat (i64 1) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP5]]) -; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll index c7950ee402a9d..95825c98777b2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-masked-loadstore.ll @@ -17,6 +17,7 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne [[VP_OP_LOAD]], zeroinitializer @@ -24,7 +25,6 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { ; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP19]], [[TMP17]], i32 [[TMP10]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], [[VP_OP_LOAD3]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP19]], [[TMP17]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP9]], [[TMP21]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll index e9f4ffda9b822..d713747b809fb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-ordered-reduction.ll @@ -20,10 +20,10 @@ define float @fadd(ptr noalias nocapture readonly %a, i64 %n) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP14]] = call float @llvm.vp.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[VP_OP_LOAD]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP9]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index 89d04f6423c58..029dd656ae73a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -19,11 +19,11 @@ define i32 @add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = add [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -213,11 +213,11 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = or [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -300,11 +300,11 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = and [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -387,11 +387,11 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = xor [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -475,12 +475,12 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = select [[TMP13]], [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP14]], [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -568,12 +568,12 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp sgt [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = select [[TMP13]], [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP14]], [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -661,12 +661,12 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp ult [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = select [[TMP13]], [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP14]], [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -754,12 +754,12 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp ugt [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = select [[TMP13]], [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP14]], [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -846,11 +846,11 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[VP_OP:%.*]] = fadd reassoc [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[VP_OP]], [[VEC_PHI]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1041,12 +1041,12 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = select [[TMP13]], [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP14]], [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1134,12 +1134,12 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast ogt [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP14:%.*]] = select [[TMP13]], [[VP_OP_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP14]], [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1442,13 +1442,13 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = call reassoc @llvm.fmuladd.nxv4f32( [[VP_OP_LOAD]], [[VP_OP_LOAD1]], [[VEC_PHI]]) ; IF-EVL-NEXT: [[TMP17]] = call @llvm.vp.merge.nxv4f32( splat (i1 true), [[TMP16]], [[VEC_PHI]], i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1536,11 +1536,11 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 3) ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4i1( [[TMP13]], splat (i1 true), [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -1628,11 +1628,11 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], splat (i1 true), i32 [[TMP9]]) ; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt [[VP_OP_LOAD]], splat (float 3.000000e+00) ; IF-EVL-NEXT: [[TMP15]] = call @llvm.vp.merge.nxv4i1( [[TMP13]], splat (i1 true), [[VEC_PHI]], i32 [[TMP9]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 0fd04da39ffbf..ed45cd7f7d89b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -17,10 +17,10 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], -1 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] -; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP4:%.*]] = sub nuw nsw i64 [[TMP20]], 1 ; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP4]], -1 ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP6]] @@ -120,6 +120,7 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL:%.*]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 ; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], -1 @@ -127,7 +128,6 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], splat (i1 true), i32 [[TMP5]]) ; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], splat (i32 100) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] -; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 ; IF-EVL-NEXT: [[TMP15:%.*]] = sub nuw nsw i64 [[TMP26]], 1 ; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP15]], -1 ; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP7]] @@ -254,9 +254,9 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[AVL:%.*]] = phi i64 [ 1025, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true) +; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1024, [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8:%.*]], i64 [[TMP10]] -; IF-EVL-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 [[TMP9]], 1 ; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 [[TMP3]], -1 ; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll index cd60817465689..2b0b7c3ce6254 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-safe-dep-distance.ll @@ -20,12 +20,12 @@ define void @test(ptr %p) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = phi i64 [ 200, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 200 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP_LOAD]], ptr align 8 [[TMP11]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -276,12 +276,12 @@ define void @trivial_due_max_vscale(ptr %p) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP5:%.*]] = phi i64 [ 200, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv2i64.p0(ptr align 32 [[TMP8]], splat (i1 true), i32 [[TMP6]]) ; IF-EVL-NEXT: [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 8192 ; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP10]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[VP_OP_LOAD]], ptr align 32 [[TMP11]], splat (i1 true), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP13:%.*]] = zext i32 [[TMP6]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP5]], [[TMP13]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -363,12 +363,12 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp ult i64 [[AVL]], 1024 ; IF-EVL-NEXT: [[SAFE_AVL:%.*]] = select i1 [[TMP9]], i64 [[AVL]], i64 1024 ; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[SAFE_AVL]], i32 1, i1 true) +; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv1i64.p0(ptr align 32 [[TMP2]], splat (i1 true), i32 [[TMP10]]) ; IF-EVL-NEXT: [[TMP4:%.*]] = add i64 [[EVL_BASED_IV]], 1024 ; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP4]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv1i64.p0( [[VP_OP_LOAD]], ptr align 32 [[TMP5]], splat (i1 true), i32 [[TMP10]]) -; IF-EVL-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll index b75623186362d..018ec9869f6b3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-uniform-store.ll @@ -20,11 +20,11 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[SPEC_SELECT]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[TMP12:%.*]] = sub nuw nsw i64 1, [[OFFSET_IDX]] ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]] ; CHECK-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, splat (i1 true), i32 [[TMP1]]) -; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = sub nuw nsw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[ARRAYIDX13]], i64 [[TMP7]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll b/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll index ea49a27b363db..5b4f5fbe82865 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/transform-narrow-interleave-to-widen-memory.ll @@ -13,6 +13,7 @@ define void @load_store_interleave_group(ptr noalias %data) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[EVL_BASED_IV]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP1]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP0]], 2 @@ -23,7 +24,6 @@ define void @load_store_interleave_group(ptr noalias %data) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv4i64( [[TMP3]], [[TMP4]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv4i64.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP2]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -110,6 +110,7 @@ define void @load_store_interleave_group_i32(ptr noalias %data) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nsw i64 [[EVL_BASED_IV]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA]], i64 [[TMP1]] ; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP0]], 4 @@ -122,7 +123,6 @@ define void @load_store_interleave_group_i32(ptr noalias %data) { ; CHECK-NEXT: [[INTERLEAVE_EVL1:%.*]] = mul nuw nsw i32 [[TMP0]], 4 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave4.nxv16i32( [[TMP3]], [[TMP4]], [[TMP7]], [[TMP8]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv16i32.p0( [[INTERLEAVED_VEC]], ptr align 8 [[TMP2]], splat (i1 true), i32 [[INTERLEAVE_EVL1]]) -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index f4c7c6f6fba1b..f0596f5bdfe25 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -18,13 +18,13 @@ define void @test_pr98413_zext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 97, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8i16.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP10:%.*]] = trunc [[WIDE_LOAD]] to ; CHECK-NEXT: [[TMP11:%.*]] = and [[TMP6]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i8.p0( [[TMP11]], ptr align 1 [[TMP12]], splat (i1 true), i32 [[TMP13]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[TMP7]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -68,13 +68,13 @@ define void @test_pr98413_sext_removed(ptr %src, ptr noalias %dst, i64 %x) { ; CHECK-NEXT: [[TMP7:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 97, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 [[TMP7]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call @llvm.vp.load.nxv8i16.p0(ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP13]]) ; CHECK-NEXT: [[TMP10:%.*]] = trunc [[WIDE_LOAD]] to ; CHECK-NEXT: [[TMP11:%.*]] = and [[TMP6]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]] ; CHECK-NEXT: call void @llvm.vp.store.nxv8i8.p0( [[TMP11]], ptr align 1 [[TMP12]], splat (i1 true), i32 [[TMP13]]) -; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP14]], [[TMP7]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -209,9 +209,9 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64 ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2f64.nxv2p0( align 8 [[BROADCAST_SPLAT4]], [[TMP8]], i32 [[TMP14]]) ; CHECK-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[WIDE_MASKED_GATHER]], align 8 [[BROADCAST_SPLAT6]], [[TMP8]], i32 [[TMP14]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index a1b8cbbabeece..9eef25ed1c958 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -15,8 +15,8 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 9, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0( zeroinitializer, align 1 zeroinitializer, splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 +; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0( zeroinitializer, align 1 zeroinitializer, splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll index df848f2db917f..478854ef2065e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll @@ -27,6 +27,7 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP13]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = zext [[VP_OP_LOAD]] to @@ -36,7 +37,6 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[TMP24:%.*]] = trunc [[TMP17]] to ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0( [[TMP24]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0( zeroinitializer, align 2 zeroinitializer, splat (i1 true), i32 [[TMP11]]) -; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index adda7c362b8e8..091cf87035a1d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -17,12 +17,12 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; SCALABLE-NEXT: [[TMP7:%.*]] = load i64, ptr [[B]], align 8 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP10]]) -; SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -75,12 +75,12 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP6]]) -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -117,12 +117,12 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B]], align 8 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP0]]) -; SCALABLE-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -176,12 +176,12 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[ARRAYIDX]], splat (i1 true), i32 [[TMP0]]) -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = zext i32 [[TMP0]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[IV]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -354,12 +354,12 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B]], align 1 ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP10]]) -; SCALABLE-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -412,12 +412,12 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B]], align 1 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP6]]) -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = zext i32 [[TMP6]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -456,10 +456,10 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 ; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP9]]) -; SCALABLE-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -514,10 +514,10 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP5]]) -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -809,10 +809,10 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; SCALABLE-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1 ; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP7]], splat (i1 true), i32 [[TMP9]]) -; SCALABLE-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 @@ -867,10 +867,10 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B]], align 1 ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; TF-SCALABLE-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr align 8 [[TMP6]], splat (i1 true), i32 [[TMP5]]) -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll index 2a63d4b22886e..252180044344d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -17,6 +17,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP11:%.*]] = phi i64 [ [[N:%.*]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], splat (i1 true), i32 [[TMP12]]) ; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[EVL_BASED_IV]] @@ -24,7 +25,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: [[VP_OP:%.*]] = add nsw [[VP_OP_LOAD1]], [[VP_OP_LOAD]] ; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[EVL_BASED_IV]] ; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP]], ptr align 4 [[TMP18]], splat (i1 true), i32 [[TMP12]]) -; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP12]] to i64 ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[TMP11]], [[TMP20]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll index dca4f47738309..f4c953b1c1aa5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vf-will-not-generate-any-vector-insts.ll @@ -26,8 +26,8 @@ define void @vf_will_not_generate_any_vector_insts(ptr %src, ptr %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 100, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT3]], align 4 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP5]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT3]], align 4 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP5]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll index f87b3326808fd..3e851eec5c8fa 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll @@ -25,8 +25,9 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: CURRENT-ITERATION-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<[[FOR_PHI:%.+]]> = phi ir<33>, ir<[[LD:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%TC>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] -; IF-EVL-NEXT: EMIT-SCALAR vp<[[PREV_EVL:%.+]]> = phi [ vp<[[VF32]]>, vector.ph ], [ vp<[[EVL:%.+]]>, vector.body ] -; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> +; IF-EVL-NEXT: EMIT-SCALAR vp<[[PREV_EVL:%.+]]> = phi [ vp<[[VF32]]>, vector.ph ], [ vp<[[CAST:%.+]]>, vector.body ] +; IF-EVL-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> +; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%.+]]> = zext vp<%evl> to i64 ; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds nuw ir<%A>, vp<[[ST]] ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds nuw ir<[[GEP1]]> @@ -36,7 +37,6 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds nuw ir<%B>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer inbounds nuw ir<[[GEP2]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ADD]]>, vp<[[EVL]]> -; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-reduction.ll index de18dfbd605c0..bb4783ac36040 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics-reduction.ll @@ -40,13 +40,13 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-OUTLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, vp<[[RDX_SELECT:%.+]]> ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> -; IF-EVL-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> +; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%.+]]> = zext vp<%evl> to i64 +; IF-EVL-OUTLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[CAST]]> ; IF-EVL-OUTLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-OUTLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN ir<[[ADD:%.+]]> = add ir<[[LD1]]>, ir<[[RDX_PHI]]> ; IF-EVL-OUTLOOP-NEXT: WIDEN-INTRINSIC vp<[[RDX_SELECT]]> = call llvm.vp.merge(ir, ir<[[ADD]]>, ir<[[RDX_PHI]]>, vp<[[EVL]]>) -; IF-EVL-OUTLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-OUTLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> @@ -80,12 +80,12 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX_PHI:%.+]]> = phi vp<[[RDX_START]]>, ir<[[RDX_NEXT:%.+]]> ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%n>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> -; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[EVL]]> +; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%.+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-INLOOP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[CAST]]> ; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> ; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> ; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>) -; IF-EVL-INLOOP-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics.ll index 488ebc5cf0a8c..443310096682b 100644 --- a/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/VPlan/RISCV/vplan-vp-intrinsics.ll @@ -24,7 +24,8 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CURRENT-ITERATION-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> ; IF-EVL-NEXT: EMIT-SCALAR vp<[[AVL:%.+]]> = phi [ ir<%N>, vector.ph ], [ vp<[[AVL_NEXT:%.+]]>, vector.body ] ; IF-EVL-NEXT: EMIT-SCALAR vp<%evl> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> -; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<%evl> +; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%.+]]> = zext vp<%evl> to i64 +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>, vp<[[CAST]]> ; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP1]]> ; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<%evl> @@ -35,7 +36,6 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> ; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer inbounds ir<[[GEP3]]> ; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<%evl> -; IF-EVL-NEXT: EMIT-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<%evl> to i64 ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> ; IF-EVL-NEXT: EMIT vp<[[AVL_NEXT]]> = sub nuw vp<[[AVL]]>, vp<[[CAST]]> ; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> diff --git a/llvm/test/Transforms/PhaseOrdering/loop-vectorize-bfi.ll b/llvm/test/Transforms/PhaseOrdering/loop-vectorize-bfi.ll index 6183a2679c3aa..94081f6594aa8 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-vectorize-bfi.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-vectorize-bfi.ll @@ -19,10 +19,10 @@ define void @f(i1 %x) !prof !0 { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 65, %[[ENTRY]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true) +; CHECK-NEXT: [[TMP5:%.*]] = zext nneg i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr null, i64 [[EVL_BASED_IV]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( poison, ptr align 8 [[TMP4]], [[TMP2]], i32 [[TMP3]]) ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( poison, ptr align 8 [[TMP4]], [[TMP2]], i32 [[TMP3]]) -; CHECK-NEXT: [[TMP5:%.*]] = zext nneg i32 [[TMP3]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[EVL_BASED_IV]], [[TMP5]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw nsw i64 [[AVL]], [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0