diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 61dba265dc948..730baa8cc0052 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -278,6 +278,7 @@ class Instruction : public User, bool isUnaryOp() const { return isUnaryOp(getOpcode()); } bool isBinaryOp() const { return isBinaryOp(getOpcode()); } bool isIntDivRem() const { return isIntDivRem(getOpcode()); } + bool isFPDivRem() const { return isFPDivRem(getOpcode()); } bool isShift() const { return isShift(getOpcode()); } bool isCast() const { return isCast(getOpcode()); } bool isFuncletPad() const { return isFuncletPad(getOpcode()); } @@ -304,6 +305,10 @@ class Instruction : public User, return Opcode == UDiv || Opcode == SDiv || Opcode == URem || Opcode == SRem; } + static inline bool isFPDivRem(unsigned Opcode) { + return Opcode == FDiv || Opcode == FRem; + } + /// Determine if the Opcode is one of the shift instructions. static inline bool isShift(unsigned Opcode) { return Opcode >= Shl && Opcode <= AShr; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8e0ca2677bf0a..b372ada2bcba3 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -456,15 +456,18 @@ static std::string shortBundleName(ArrayRef VL, int Idx = -1) { /// \returns true if all of the instructions in \p VL are in the same block or /// false otherwise. static bool allSameBlock(ArrayRef VL) { - Instruction *I0 = dyn_cast(VL[0]); - if (!I0) + auto *It = find_if(VL, IsaPred); + if (It == VL.end()) return false; + Instruction *I0 = cast(*It); if (all_of(VL, isVectorLikeInstWithConstOps)) return true; BasicBlock *BB = I0->getParent(); - for (int I = 1, E = VL.size(); I < E; I++) { - auto *II = dyn_cast(VL[I]); + for (Value *V : iterator_range(It, VL.end())) { + if (isa(V)) + continue; + auto *II = dyn_cast(V); if (!II) return false; @@ -893,10 +896,19 @@ static bool isCmpSameOrSwapped(const CmpInst *BaseCI, const CmpInst *CI, static InstructionsState getSameOpcode(ArrayRef VL, const TargetLibraryInfo &TLI) { // Make sure these are all Instructions. - if (!all_of(VL, IsaPred)) + if (!all_of(VL, IsaPred)) + return InstructionsState::invalid(); + + auto *It = find_if(VL, IsaPred); + if (It == VL.end()) + return InstructionsState::invalid(); + + Value *V = *It; + unsigned InstCnt = std::count_if(It, VL.end(), IsaPred); + if ((VL.size() > 2 && !isa(V) && InstCnt < VL.size() / 2) || + (VL.size() == 2 && InstCnt < 2)) return InstructionsState::invalid(); - Value *V = VL.front(); bool IsCastOp = isa(V); bool IsBinOp = isa(V); bool IsCmpOp = isa(V); @@ -904,7 +916,7 @@ static InstructionsState getSameOpcode(ArrayRef VL, IsCmpOp ? cast(V)->getPredicate() : CmpInst::BAD_ICMP_PREDICATE; unsigned Opcode = cast(V)->getOpcode(); unsigned AltOpcode = Opcode; - unsigned AltIndex = 0; + unsigned AltIndex = std::distance(VL.begin(), It); bool SwappedPredsCompatible = [&]() { if (!IsCmpOp) @@ -940,8 +952,17 @@ static InstructionsState getSameOpcode(ArrayRef VL, if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty()) return InstructionsState::invalid(); } + bool AnyPoison = InstCnt != VL.size(); for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { - auto *I = cast(VL[Cnt]); + auto *I = dyn_cast(VL[Cnt]); + if (!I) + continue; + + // Cannot combine poison and divisions. + // TODO: do some smart analysis of the CallInsts to exclude divide-like + // intrinsics/functions only. + if (AnyPoison && (I->isIntDivRem() || I->isFPDivRem() || isa(I))) + return InstructionsState::invalid(); unsigned InstOpcode = I->getOpcode(); if (IsBinOp && isa(I)) { if (InstOpcode == Opcode || InstOpcode == AltOpcode) @@ -1183,10 +1204,13 @@ static SmallBitVector getAltInstrMask(ArrayRef VL, unsigned Opcode0, Type *ScalarTy = VL[0]->getType(); unsigned ScalarTyNumElements = getNumElements(ScalarTy); SmallBitVector OpcodeMask(VL.size() * ScalarTyNumElements, false); - for (unsigned Lane : seq(VL.size())) + for (unsigned Lane : seq(VL.size())) { + if (isa(VL[Lane])) + continue; if (cast(VL[Lane])->getOpcode() == Opcode1) OpcodeMask.set(Lane * ScalarTyNumElements, Lane * ScalarTyNumElements + ScalarTyNumElements); + } return OpcodeMask; } @@ -1799,13 +1823,17 @@ class BoUpSLP { (S.MainOp->getNumOperands() <= 2 || !MainAltOps.empty() || !S.isAltShuffle()) && all_of(Ops, [&S](Value *V) { - return cast(V)->getNumOperands() == - S.MainOp->getNumOperands(); + return isa(V) || + cast(V)->getNumOperands() == + S.MainOp->getNumOperands(); })) return S.isAltShuffle() ? LookAheadHeuristics::ScoreAltOpcodes : LookAheadHeuristics::ScoreSameOpcode; } + if (I1 && isa(V2)) + return LookAheadHeuristics::ScoreSameOpcode; + if (isa(V2)) return LookAheadHeuristics::ScoreUndef; @@ -2354,17 +2382,17 @@ class BoUpSLP { assert(!VL.empty() && "Bad VL"); assert((empty() || VL.size() == getNumLanes()) && "Expected same number of lanes"); - assert(isa(VL[0]) && "Expected instruction"); constexpr unsigned IntrinsicNumOperands = 2; - unsigned NumOperands = isa(VL[0]) - ? IntrinsicNumOperands - : cast(VL[0])->getNumOperands(); + auto *VL0 = cast(*find_if(VL, IsaPred)); + unsigned NumOperands = isa(VL0) ? IntrinsicNumOperands + : VL0->getNumOperands(); OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { OpsVec[OpIdx].resize(NumLanes); for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { - assert(isa(VL[Lane]) && "Expected instruction"); + assert((isa(VL[Lane]) || isa(VL[Lane])) && + "Expected instruction or poison value"); // Our tree has just 3 nodes: the root and two operands. // It is therefore trivial to get the APO. We only need to check the // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or @@ -2375,6 +2403,12 @@ class BoUpSLP { // Since operand reordering is performed on groups of commutative // operations or alternating sequences (e.g., +, -), we can safely // tell the inverse operations by checking commutativity. + if (isa(VL[Lane])) { + OpsVec[OpIdx][Lane] = { + PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true, + false}; + continue; + } bool IsInverseOperation = !isCommutative(cast(VL[Lane])); bool APO = (OpIdx == 0) ? false : IsInverseOperation; OpsVec[OpIdx][Lane] = {cast(VL[Lane])->getOperand(OpIdx), @@ -2472,7 +2506,7 @@ class BoUpSLP { Value *OpILn = getValue(OpI, Ln); return (L && L->isLoopInvariant(OpILn)) || (getSameOpcode({Op, OpILn}, TLI).getOpcode() && - Op->getParent() == cast(OpILn)->getParent()); + allSameBlock({Op, OpILn})); })) return true; } @@ -2484,7 +2518,8 @@ class BoUpSLP { VLOperands(ArrayRef RootVL, const BoUpSLP &R) : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R), L(R.LI->getLoopFor( - (cast(RootVL.front())->getParent()))) { + (cast(*find_if(RootVL, IsaPred)) + ->getParent()))) { // Append all the operands of RootVL. appendOperandsOfVL(RootVL); } @@ -3286,13 +3321,18 @@ class BoUpSLP { /// Set the operands of this bundle in their original order. void setOperandsInOrder() { assert(Operands.empty() && "Already initialized?"); - auto *I0 = cast(Scalars[0]); + auto *I0 = cast(*find_if(Scalars, IsaPred)); Operands.resize(I0->getNumOperands()); unsigned NumLanes = Scalars.size(); for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands(); OpIdx != NumOperands; ++OpIdx) { Operands[OpIdx].resize(NumLanes); for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + if (isa(Scalars[Lane])) { + Operands[OpIdx][Lane] = + PoisonValue::get(I0->getOperand(OpIdx)->getType()); + continue; + } auto *I = cast(Scalars[Lane]); assert(I->getNumOperands() == NumOperands && "Expected same number of operands"); @@ -4912,8 +4952,8 @@ BoUpSLP::canVectorizeLoads(ArrayRef VL, const Value *VL0, PointerOps.resize(Sz); auto *POIter = PointerOps.begin(); for (Value *V : VL) { - auto *L = cast(V); - if (!L->isSimple()) + auto *L = dyn_cast(V); + if (!L || !L->isSimple()) return LoadsState::Gather; *POIter = L->getPointerOperand(); ++POIter; @@ -5491,6 +5531,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { TE.ReuseShuffleIndices.end()); if (TE.getOpcode() == Instruction::ExtractElement && all_of(TE.Scalars, [Sz](Value *V) { + if (isa(V)) + return true; std::optional Idx = getExtractIndex(cast(V)); return Idx && *Idx < Sz; })) { @@ -5579,7 +5621,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { auto PHICompare = [&](unsigned I1, unsigned I2) { Value *V1 = TE.Scalars[I1]; Value *V2 = TE.Scalars[I2]; - if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0)) + if (V1 == V2 || (V1->getNumUses() == 0 && V2->getNumUses() == 0) || + isa(V1) || isa(V2)) return false; if (V1->getNumUses() < V2->getNumUses()) return true; @@ -7352,8 +7395,14 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, for (unsigned I : seq(0, S.MainOp->getNumOperands())) { Operands.emplace_back(); // Prepare the operand vector. - for (Value *V : VL) + for (Value *V : VL) { + if (isa(V)) { + Operands.back().push_back( + PoisonValue::get(S.MainOp->getOperand(I)->getType())); + continue; + } Operands.back().push_back(cast(V)->getOperand(I)); + } } if (Operands.size() == 2) { // Try find best operands candidates. @@ -7460,8 +7509,11 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( if (VL0->getNumOperands() > MaxPHINumOperands) return TreeEntry::NeedToGather; // Check for terminator values (e.g. invoke). - for (Value *V : VL) - for (Value *Incoming : cast(V)->incoming_values()) { + for (Value *V : VL) { + auto *PHI = dyn_cast(V); + if (!PHI) + continue; + for (Value *Incoming : PHI->incoming_values()) { Instruction *Term = dyn_cast(Incoming); if (Term && Term->isTerminator()) { LLVM_DEBUG(dbgs() @@ -7469,6 +7521,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( return TreeEntry::NeedToGather; } } + } return TreeEntry::Vectorize; } @@ -7544,8 +7597,10 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); - else if (any_of(VL, - [](Value *V) { return !cast(V)->isSimple(); })) + else if (any_of(VL, [](Value *V) { + auto *LI = dyn_cast(V); + return !LI || !LI->isSimple(); + })) LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); else LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); @@ -7569,6 +7624,8 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); for (Value *V : VL) { + if (isa(V)) + continue; Type *Ty = cast(V)->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { LLVM_DEBUG( @@ -7585,7 +7642,9 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0); Type *ComparedTy = VL0->getOperand(0)->getType(); for (Value *V : VL) { - CmpInst *Cmp = cast(V); + if (isa(V)) + continue; + auto *Cmp = cast(V); if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) || Cmp->getOperand(0)->getType() != ComparedTy) { LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); @@ -7828,7 +7887,13 @@ class PHIHandler { } // Prepare the operand vector. for (auto [Idx, V] : enumerate(Phis)) { - auto *P = cast(V); + auto *P = dyn_cast(V); + if (!P) { + assert(isa(V) && + "Expected isa instruction or poison value."); + Operands[I][Idx] = V; + continue; + } if (P->getIncomingBlock(I) == InBB) Operands[I][Idx] = P->getIncomingValue(I); else @@ -7847,6 +7912,11 @@ class PHIHandler { Blocks.try_emplace(InBB).first->second.push_back(I); } for (auto [Idx, V] : enumerate(Phis)) { + if (isa(V)) { + for (unsigned I : seq(Main->getNumIncomingValues())) + Operands[I][Idx] = V; + continue; + } auto *P = cast(V); for (unsigned I : seq(0, P->getNumIncomingValues())) { BasicBlock *InBB = P->getIncomingBlock(I); @@ -7896,7 +7966,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (Value *V : VL) { if (isConstant(V)) { ReuseShuffleIndices.emplace_back( - isa(V) ? PoisonMaskElem : UniqueValues.size()); + isa(V) ? PoisonMaskElem : UniqueValues.size()); UniqueValues.emplace_back(V); continue; } @@ -7928,11 +7998,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, }))) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && - all_of(UniqueValues, [=](Value *V) { - return isa(V) || - areAllUsersVectorized(cast(V), - UserIgnoreList); - })) { + all_of(UniqueValues, IsaPred)) { // Find the number of elements, which forms full vectors. unsigned PWSz = getFullVectorNumberOfElements( *TTI, UniqueValues.front()->getType(), UniqueValues.size()); @@ -7940,8 +8006,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, ReuseShuffleIndices.clear(); } else { NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end()); - NonUniqueValueVL.append(PWSz - UniqueValues.size(), - UniqueValues.back()); + NonUniqueValueVL.append( + PWSz - UniqueValues.size(), + PoisonValue::get(UniqueValues.front()->getType())); VL = NonUniqueValueVL; } return true; @@ -8076,7 +8143,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, return true; // Check if all operands are extracts, part of vector node or can build a // regular vectorize node. - SmallVector InstsCount(VL.size(), 0); + SmallVector InstsCount; for (Value *V : VL) { auto *I = cast(V); InstsCount.push_back(count_if(I->operand_values(), [](Value *Op) { @@ -8470,6 +8537,11 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } else { // Collect operands - commute if it uses the swapped predicate. for (Value *V : VL) { + if (isa(V)) { + Left.push_back(PoisonValue::get(VL0->getOperand(0)->getType())); + Right.push_back(PoisonValue::get(VL0->getOperand(1)->getType())); + continue; + } auto *Cmp = cast(V); Value *LHS = Cmp->getOperand(0); Value *RHS = Cmp->getOperand(1); @@ -8669,7 +8741,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (isa(VL0) || CI) { ValueList Left, Right; if (!CI || all_of(VL, [](Value *V) { - return cast(V)->isCommutative(); + return isa(V) || cast(V)->isCommutative(); })) { reorderInputsAccordingToOpcode(VL, Left, Right, *this); } else { @@ -8682,6 +8754,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Collect operands - commute if it uses the swapped predicate or // alternate operation. for (Value *V : VL) { + if (isa(V)) { + Left.push_back( + PoisonValue::get(MainCI->getOperand(0)->getType())); + Right.push_back( + PoisonValue::get(MainCI->getOperand(1)->getType())); + continue; + } auto *Cmp = cast(V); Value *LHS = Cmp->getOperand(0); Value *RHS = Cmp->getOperand(1); @@ -8886,6 +8965,8 @@ void BoUpSLP::TreeEntry::buildAltOpShuffleMask( unsigned Idx = I; if (!ReorderIndices.empty()) Idx = OrderMask[I]; + if (isa(Scalars[Idx])) + continue; auto *OpInst = cast(Scalars[Idx]); if (IsAltOp(OpInst)) { Mask[I] = Sz + Idx; @@ -9660,9 +9741,11 @@ void BoUpSLP::transformNodes() { // Try to vectorize reduced values or if all users are vectorized. // For expensive instructions extra extracts might be profitable. if ((!UserIgnoreList || E.Idx != 0) && - TTI->getInstructionCost(cast(Slice.front()), - CostKind) < TTI::TCC_Expensive && + TTI->getInstructionCost(S.MainOp, CostKind) < + TTI::TCC_Expensive && !all_of(Slice, [&](Value *V) { + if (isa(V)) + return true; return areAllUsersVectorized(cast(V), UserIgnoreList); })) @@ -9685,12 +9768,13 @@ void BoUpSLP::transformNodes() { continue; } } else if (S.getOpcode() == Instruction::ExtractElement || - (TTI->getInstructionCost( - cast(Slice.front()), CostKind) < + (TTI->getInstructionCost(S.MainOp, CostKind) < TTI::TCC_Expensive && !CheckOperandsProfitability( - cast(Slice.front()), - cast(Slice.back()), S))) { + S.MainOp, + cast(*find_if(reverse(Slice), + IsaPred)), + S))) { // Do not vectorize extractelements (handled effectively // alread). Do not vectorize non-profitable instructions (with // low cost and non-vectorizable operands.) @@ -10958,7 +11042,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, const unsigned Sz = UniqueValues.size(); SmallBitVector UsedScalars(Sz, false); for (unsigned I = 0; I < Sz; ++I) { - if (getTreeEntry(UniqueValues[I]) == E) + if (isa(UniqueValues[I]) && getTreeEntry(UniqueValues[I]) == E) continue; UsedScalars.set(I); } @@ -11097,6 +11181,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, case Instruction::ExtractValue: case Instruction::ExtractElement: { auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *I = cast(UniqueValues[Idx]); VectorType *SrcVecTy; if (ShuffleOrOp == Instruction::ExtractElement) { @@ -11285,10 +11372,10 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, VecOpcode = Instruction::UIToFP; } auto GetScalarCost = [&](unsigned Idx) -> InstructionCost { - auto *VI = cast(UniqueValues[Idx]); + assert(Idx == 0 && "Expected 0 index only"); return TTI->getCastInstrCost(Opcode, VL0->getType(), VL0->getOperand(0)->getType(), - TTI::getCastContextHint(VI), CostKind, VI); + TTI::getCastContextHint(VL0), CostKind, VL0); }; auto GetVectorCost = [=](InstructionCost CommonCost) { // Do not count cost here if minimum bitwidth is in effect and it is just @@ -11316,6 +11403,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, ? CmpInst::BAD_FCMP_PREDICATE : CmpInst::BAD_ICMP_PREDICATE; auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *VI = cast(UniqueValues[Idx]); CmpInst::Predicate CurrentPred = ScalarTy->isFloatingPointTy() ? CmpInst::BAD_FCMP_PREDICATE @@ -11396,6 +11486,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, case Instruction::Or: case Instruction::Xor: { auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *VI = cast(UniqueValues[Idx]); unsigned OpIdx = isa(VI) ? 0 : 1; TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0)); @@ -11583,6 +11676,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, return false; }; auto GetScalarCost = [&](unsigned Idx) { + if (isa(UniqueValues[Idx])) + return InstructionCost(TTI::TCC_Free); + auto *VI = cast(UniqueValues[Idx]); assert(E->isOpcodeOrAlt(VI) && "Unexpected main/alternate opcode"); (void)E; @@ -13373,8 +13469,8 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { if (E->getOpcode() == Instruction::GetElementPtr && !isa(V)) return true; - auto *I = cast(V); - return !E->isOpcodeOrAlt(I) || I->getParent() == BB || + auto *I = dyn_cast(V); + return !I || !E->isOpcodeOrAlt(I) || I->getParent() == BB || isVectorLikeInstWithConstOps(I); })) && "Expected gathered loads or GEPs or instructions from same basic " @@ -13473,8 +13569,9 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) { })) || all_of(E->Scalars, [](Value *V) { - return !isVectorLikeInstWithConstOps(V) && - isUsedOutsideBlock(V); + return isa(V) || + (!isVectorLikeInstWithConstOps(V) && + isUsedOutsideBlock(V)); }) || (E->isGather() && E->Idx == 0 && all_of(E->Scalars, [](Value *V) { return isa(V) || @@ -14002,12 +14099,16 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { Value *V1 = E1.VectorizedValue; if (V1->getType()->isIntOrIntVectorTy()) V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); Value *V2 = E2.VectorizedValue; if (V2->getType()->isIntOrIntVectorTy()) V2 = castToScalarTyElem(V2, any_of(E2.Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); @@ -14019,6 +14120,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { Value *V1 = E1.VectorizedValue; if (V1->getType()->isIntOrIntVectorTy()) V1 = castToScalarTyElem(V1, any_of(E1.Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); @@ -14181,6 +14284,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { Value *V = E->VectorizedValue; if (V->getType()->isIntOrIntVectorTy()) V = castToScalarTyElem(V, any_of(E->Scalars, [&](Value *V) { + if (isa(V)) + return false; return !isKnownNonNegative( V, SimplifyQuery(*R.DL)); })); @@ -14897,6 +15002,16 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E, Type *ScalarTy, Builder, *this); } +/// \returns \p I after propagating metadata from \p VL only for instructions in +/// \p VL. +static Instruction *propagateMetadata(Instruction *Inst, ArrayRef VL) { + SmallVector Insts; + for (Value *V : VL) + if (isa(V)) + Insts.push_back(V); + return llvm::propagateMetadata(Inst, Insts); +} + Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { IRBuilderBase::InsertPointGuard Guard(Builder); @@ -14966,6 +15081,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { IsSigned = It->second.second; else IsSigned = any_of(OpE->Scalars, [&](Value *R) { + if (isa(V)) + return false; return !isKnownNonNegative(R, SimplifyQuery(*DL)); }); return IsSigned; @@ -15054,7 +15171,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Builder.SetInsertPoint(LI); Value *Ptr = LI->getPointerOperand(); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign()); - Value *NewV = propagateMetadata(V, E->Scalars); + Value *NewV = ::propagateMetadata(V, E->Scalars); NewV = FinalShuffle(NewV, E); E->VectorizedValue = NewV; return NewV; @@ -15387,7 +15504,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { static_cast(E->getOpcode()), Op); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); V = FinalShuffle(V, E); @@ -15481,11 +15598,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { RHS); propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end()); if (auto *I = dyn_cast(V)) { - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); // Drop nuw flags for abs(sub(commutative), true). if (!MinBWs.contains(E) && ShuffleOrOp == Instruction::Sub && any_of(E->Scalars, [](Value *V) { - return isCommutative(cast(V)); + return isa(V) || isCommutative(cast(V)); })) I->setHasNoUnsignedWrap(/*b=*/false); } @@ -15580,7 +15697,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Align CommonAlignment = computeCommonAlignment(E->Scalars); NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment); } - Value *V = propagateMetadata(NewLI, E->Scalars); + Value *V = ::propagateMetadata(NewLI, E->Scalars); V = FinalShuffle(V, E); E->VectorizedValue = V; @@ -15625,7 +15742,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { ST = Inst; } - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = ::propagateMetadata(ST, E->Scalars); E->VectorizedValue = V; ++NumVectorInstructions; @@ -15658,7 +15775,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (isa(V)) GEPs.push_back(V); } - V = propagateMetadata(I, GEPs); + V = ::propagateMetadata(I, GEPs); } V = FinalShuffle(V, E); @@ -15772,7 +15889,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); V = FinalShuffle(V, E); } else { assert(E->isAltShuffle() && @@ -15849,7 +15966,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { assert(LHS->getType() == VecTy && "Expected same type as operand."); if (auto *I = dyn_cast(LHS)) - LHS = propagateMetadata(I, E->Scalars); + LHS = ::propagateMetadata(I, E->Scalars); LHS = FinalShuffle(LHS, E); E->VectorizedValue = LHS; ++NumVectorInstructions; @@ -15890,9 +16007,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (auto *I = dyn_cast(Vec); I && Opcode == Instruction::Sub && !MinBWs.contains(E) && any_of(E->Scalars, [](Value *V) { + if (isa(V)) + return false; auto *IV = cast(V); - return IV->getOpcode() == Instruction::Sub && - isCommutative(cast(IV)); + return IV->getOpcode() == Instruction::Sub && isCommutative(IV); })) I->setHasNoUnsignedWrap(/*b=*/false); }; @@ -15905,7 +16023,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } V = Builder.CreateShuffleVector(V0, V1, Mask); if (auto *I = dyn_cast(V)) { - V = propagateMetadata(I, E->Scalars); + V = ::propagateMetadata(I, E->Scalars); GatherShuffleExtractSeq.insert(I); CSEBlocks.insert(I->getParent()); } @@ -16483,6 +16601,8 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, if (auto *EE = dyn_cast(Scalar); EE && IgnoredExtracts.contains(EE)) continue; + if (isa(Scalar)) + continue; #ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { @@ -17381,9 +17501,13 @@ bool BoUpSLP::collectValuesToDemote( // by the insertelement instruction and not used in multiple vector nodes, it // cannot be demoted. bool IsSignedNode = any_of(E.Scalars, [&](Value *R) { + if (isa(R)) + return false; return !isKnownNonNegative(R, SimplifyQuery(*DL)); }); auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool { + if (isa(V)) + return true; if (MultiNodeScalars.contains(V)) return false; // For lat shuffle of sext/zext with many uses need to check the extra bit @@ -17566,6 +17690,8 @@ bool BoUpSLP::collectValuesToDemote( // inrange amount, we can always perform a SHL in a smaller type. auto ShlChecker = [&](unsigned BitWidth, unsigned) { return all_of(E.Scalars, [&](Value *V) { + if (isa(V)) + return true; auto *I = cast(V); KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); return AmtKnownBits.getMaxValue().ult(BitWidth); @@ -17580,6 +17706,8 @@ bool BoUpSLP::collectValuesToDemote( // already zeros. auto LShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { return all_of(E.Scalars, [&](Value *V) { + if (isa(V)) + return true; auto *I = cast(V); KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); @@ -17849,6 +17977,8 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. bool IsKnownPositive = !IsSignedCmp && all_of(E.Scalars, [&](Value *R) { + if (isa(R)) + return true; KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); @@ -17856,6 +17986,8 @@ void BoUpSLP::computeMinimumValueSizes() { // We first check if all the bits of the roots are demanded. If they're not, // we can truncate the roots to this narrower type. for (Value *Root : E.Scalars) { + if (isa(Root)) + continue; unsigned NumSignBits = ComputeNumSignBits(Root, *DL, 0, AC, nullptr, DT); TypeSize NumTypeBits = DL->getTypeSizeInBits(Root->getType()->getScalarType()); @@ -17912,9 +18044,8 @@ void BoUpSLP::computeMinimumValueSizes() { !(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && (!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) || DL->getTypeSizeInBits(TreeRootIT) / - DL->getTypeSizeInBits(cast(E.Scalars.front()) - ->getOperand(0) - ->getType()) > + DL->getTypeSizeInBits( + E.getMainOp()->getOperand(0)->getType()) > 2))))) return 0u; // Round MaxBitWidth up to the next power-of-two. @@ -17933,7 +18064,8 @@ void BoUpSLP::computeMinimumValueSizes() { // x i1> to in)). if (all_of(*UserIgnoreList, [](Value *V) { - return cast(V)->getOpcode() == Instruction::Add; + return isa(V) || + cast(V)->getOpcode() == Instruction::Add; }) && VectorizableTree.front()->State == TreeEntry::Vectorize && VectorizableTree.front()->getOpcode() == Instruction::ZExt && @@ -17942,6 +18074,8 @@ void BoUpSLP::computeMinimumValueSizes() { ReductionBitWidth = 1; } else { for (Value *V : *UserIgnoreList) { + if (isa(V)) + continue; unsigned NumSignBits = ComputeNumSignBits(V, *DL, 0, AC, nullptr, DT); TypeSize NumTypeBits = DL->getTypeSizeInBits(V->getType()); unsigned BitWidth1 = NumTypeBits - NumSignBits; @@ -18057,8 +18191,10 @@ void BoUpSLP::computeMinimumValueSizes() { if (MinBWs.contains(TE)) continue; bool IsSigned = any_of(TE->Scalars, [&](Value *R) { - return !isKnownNonNegative(R, SimplifyQuery(*DL)); - }); + if (isa(R)) + return false; + return !isKnownNonNegative(R, SimplifyQuery(*DL)); + }); MinBWs.try_emplace(TE, MaxBitWidth, IsSigned); } } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll index d6073ea4bbbae..289807a808d5d 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll @@ -12,7 +12,7 @@ define void @test() { ; CHECK: [[BB63]]: ; CHECK-NEXT: br label %[[BB64]] ; CHECK: [[BB64]]: -; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ] +; CHECK-NEXT: [[TMP25:%.*]] = phi <16 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ] ; CHECK-NEXT: [[I66:%.*]] = load float, ptr poison, align 16 ; CHECK-NEXT: [[I67:%.*]] = load float, ptr poison, align 4 ; CHECK-NEXT: [[I68:%.*]] = load float, ptr poison, align 8 @@ -37,28 +37,30 @@ define void @test() { ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15 ; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]] ; CHECK: [[BB77]]: -; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0 +; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[I68]], i32 0 +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[I66]], i32 1 +; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> ; CHECK-NEXT: br label %[[BB78:.*]] ; CHECK: [[BB78]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ] -; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ] -; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]] -; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ [[TMP31]], %[[BB77]] ], [ [[TMP37:%.*]], %[[BB78]] ] +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> ; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> ; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2) -; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP13]] +; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP38]], [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]] ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison ; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison -; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP37]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> ; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]] ; CHECK: [[BB167]]: ; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ] diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll index e2d1a29ee22de..4755c690c0711 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll @@ -307,14 +307,14 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> @@ -395,10 +395,10 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 -; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1 ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 @@ -483,15 +483,15 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) { ; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4 ; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16 ; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2 ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]] ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> ; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]] ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll index 2a036cc8fe326..4282ae2ab88ec 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll @@ -33,16 +33,15 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]] ; CHECK: bb9: ; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ] -; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP7]] = phi <4 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP11:%.*]], [[BB12]] ] ; CHECK-NEXT: br label [[BB2]] ; CHECK: bb10: ; CHECK-NEXT: [[LOCAL_10_38123_LCSSA:%.*]] = phi i32 [ [[TMP10]], [[BB3]] ] ; CHECK-NEXT: [[LOCAL_5_33118_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[BB3]] ] ; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_38123_LCSSA]], i32 0 -; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP12]], i32 [[LOCAL_5_33118_LCSSA]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[LOCAL_10_38123_LCSSA]], i32 2 +; CHECK-NEXT: [[TMP9]] = insertelement <4 x i32> [[TMP8]], i32 [[LOCAL_5_33118_LCSSA]], i32 3 ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb11: ; CHECK-NEXT: ret void @@ -51,8 +50,8 @@ define void @foo() personality ptr @bar { ; CHECK-NEXT: [[LOCAL_5_84111_LCSSA:%.*]] = phi i32 [ [[LOCAL_5_84111]], [[BB7]] ] ; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 } ; CHECK-NEXT: cleanup -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_89113_LCSSA]], i32 0 -; CHECK-NEXT: [[TMP9]] = insertelement <2 x i32> [[TMP11]], i32 [[LOCAL_5_84111_LCSSA]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[LOCAL_10_89113_LCSSA]], i32 2 +; CHECK-NEXT: [[TMP11]] = insertelement <4 x i32> [[TMP12]], i32 [[LOCAL_5_84111_LCSSA]], i32 3 ; CHECK-NEXT: br label [[BB9]] ; bb1: