@@ -552,7 +552,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
552552 case VPRecipeBase::VPWidenIntOrFpInductionSC:
553553 case VPRecipeBase::VPWidenPointerInductionSC:
554554 case VPRecipeBase::VPReductionPHISC:
555- case VPRecipeBase::VPPartialReductionSC:
556555 return true ;
557556 case VPRecipeBase::VPBranchOnMaskSC:
558557 case VPRecipeBase::VPInterleaveSC:
@@ -2182,34 +2181,37 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
21822181 // / Descriptor for the reduction.
21832182 const RecurrenceDescriptor &RdxDesc;
21842183
2185- // / The phi is part of an in-loop reduction.
2186- bool IsInLoop;
2187-
21882184 // / The phi is part of an ordered reduction. Requires IsInLoop to be true.
21892185 bool IsOrdered;
21902186
2191- // / When expanding the reduction PHI, the plan's VF element count is divided
2192- // / by this factor to form the reduction phi's VF.
2193- unsigned VFScaleFactor = 1 ;
2187+ // / The scaling factor, relative to the VF, that this recipe's output is
2188+ // / divided by.
2189+ // / For outer-loop reductions this is equal to 1.
2190+ // / For in-loop reductions this is equal to 0, to specify that this is equal
2191+ // / to the VF (which may not be known yet). For partial-reductions this is
2192+ // / equal to another scalar value.
2193+ unsigned VFScaleFactor;
21942194
21952195public:
21962196 // / Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
21972197 // / RdxDesc.
21982198 VPReductionPHIRecipe (PHINode *Phi, const RecurrenceDescriptor &RdxDesc,
2199- VPValue &Start, bool IsInLoop = false ,
2200- bool IsOrdered = false , unsigned VFScaleFactor = 1 )
2199+ VPValue &Start, bool IsOrdered = false ,
2200+ unsigned VFScaleFactor = 1 )
22012201 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2202- RdxDesc (RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
2203- VFScaleFactor(VFScaleFactor) {
2204- assert ((!IsOrdered || IsInLoop) && " IsOrdered requires IsInLoop" );
2202+ RdxDesc (RdxDesc), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2203+ assert ((!IsOrdered || isInLoop ()) &&
2204+ " IsOrdered requires the reduction to be in-loop" );
2205+ assert (((!isInLoop () && !IsOrdered) || isInLoop ()) &&
2206+ " Invalid VFScaleFactor" );
22052207 }
22062208
22072209 ~VPReductionPHIRecipe () override = default ;
22082210
22092211 VPReductionPHIRecipe *clone () override {
2210- auto *R = new VPReductionPHIRecipe (cast<PHINode>( getUnderlyingInstr ()),
2211- RdxDesc, * getOperand ( 0 ), IsInLoop ,
2212- IsOrdered, VFScaleFactor);
2212+ auto *R =
2213+ new VPReductionPHIRecipe (cast<PHINode>( getUnderlyingInstr ()), RdxDesc ,
2214+ * getOperand ( 0 ), IsOrdered, VFScaleFactor);
22132215 R->addOperand (getBackedgeValue ());
22142216 return R;
22152217 }
@@ -2235,8 +2237,10 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
22352237 // / Returns true, if the phi is part of an ordered reduction.
22362238 bool isOrdered () const { return IsOrdered; }
22372239
2238- // / Returns true, if the phi is part of an in-loop reduction.
2239- bool isInLoop () const { return IsInLoop; }
2240+ // / Returns true if the phi is part of an in-loop reduction.
2241+ bool isInLoop () const { return VFScaleFactor == 0 ; }
2242+
2243+ bool isPartialReduction () const { return VFScaleFactor > 1 ; }
22402244
22412245 // / Returns true if the recipe only uses the first lane of operand \p Op.
22422246 bool onlyFirstLaneUsed (const VPValue *Op) const override {
@@ -2409,23 +2413,32 @@ class VPInterleaveRecipe : public VPRecipeBase {
24092413 Instruction *getInsertPos () const { return IG->getInsertPos (); }
24102414};
24112415
2412- // / A recipe to represent inloop reduction operations, performing a reduction on
2413- // / a vector operand into a scalar value, and adding the result to a chain.
2414- // / The Operands are {ChainOp, VecOp, [Condition]}.
2416+ // / A recipe to represent inloop, ordered or partial reduction operations. It
2417+ // / performs a reduction on a vector operand into a scalar (vector in the case
2418+ // / of a partial reduction) value, and adds the result to a chain. The Operands
2419+ // / are {ChainOp, VecOp, [Condition]}.
24152420class VPReductionRecipe : public VPRecipeWithIRFlags {
24162421 // / The recurrence kind for the reduction in question.
24172422 RecurKind RdxKind;
24182423 bool IsOrdered;
24192424 // / Whether the reduction is conditional.
24202425 bool IsConditional = false ;
2426+ // / The scaling factor, relative to the VF, that this recipe's output is
2427+ // / divided by.
2428+ // / For outer-loop reductions this is equal to 1.
2429+ // / For in-loop reductions this is equal to 0, to specify that this is equal
2430+ // / to the VF (which may not be known yet).
2431+ // / For partial-reductions this is equal to another scalar value.
2432+ unsigned VFScaleFactor;
24212433
24222434protected:
24232435 VPReductionRecipe (const unsigned char SC, RecurKind RdxKind,
24242436 FastMathFlags FMFs, Instruction *I,
24252437 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2426- bool IsOrdered, DebugLoc DL)
2438+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL)
24272439 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2428- IsOrdered (IsOrdered) {
2440+ IsOrdered (IsOrdered), VFScaleFactor(VFScaleFactor) {
2441+ assert ((!IsOrdered || VFScaleFactor == 0 ) && " Invalid scale factor" );
24292442 if (CondOp) {
24302443 IsConditional = true ;
24312444 addOperand (CondOp);
@@ -2436,24 +2449,24 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24362449public:
24372450 VPReductionRecipe (RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
24382451 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2439- bool IsOrdered, DebugLoc DL = {})
2452+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
24402453 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
24412454 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2442- IsOrdered, DL) {}
2455+ IsOrdered, VFScaleFactor, DL) {}
24432456
24442457 VPReductionRecipe (const RecurKind RdxKind, FastMathFlags FMFs,
24452458 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2446- bool IsOrdered, DebugLoc DL = {})
2459+ bool IsOrdered, unsigned VFScaleFactor, DebugLoc DL = {})
24472460 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr ,
24482461 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2449- IsOrdered, DL) {}
2462+ IsOrdered, VFScaleFactor, DL) {}
24502463
24512464 ~VPReductionRecipe () override = default ;
24522465
24532466 VPReductionRecipe *clone () override {
2454- return new VPReductionRecipe (RdxKind, getFastMathFlags (),
2455- getUnderlyingInstr (), getChainOp (), getVecOp (),
2456- getCondOp (), IsOrdered, getDebugLoc ());
2467+ return new VPReductionRecipe (
2468+ RdxKind, getFastMathFlags (), getUnderlyingInstr (), getChainOp (),
2469+ getVecOp (), getCondOp (), IsOrdered, VFScaleFactor , getDebugLoc ());
24572470 }
24582471
24592472 static inline bool classof (const VPRecipeBase *R) {
@@ -2485,6 +2498,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24852498 bool isOrdered () const { return IsOrdered; };
24862499 // / Return true if the in-loop reduction is conditional.
24872500 bool isConditional () const { return IsConditional; };
2501+ // / Return true if the reduction is a partial reduction.
2502+ bool isPartialReduction () const { return VFScaleFactor > 1 ; }
24882503 // / The VPValue of the scalar Chain being accumulated.
24892504 VPValue *getChainOp () const { return getOperand (0 ); }
24902505 // / The VPValue of the vector value to be reduced.
@@ -2493,65 +2508,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
24932508 VPValue *getCondOp () const {
24942509 return isConditional () ? getOperand (getNumOperands () - 1 ) : nullptr ;
24952510 }
2496- };
2497-
2498- // / A recipe for forming partial reductions. In the loop, an accumulator and
2499- // / vector operand are added together and passed to the next iteration as the
2500- // / next accumulator. After the loop body, the accumulator is reduced to a
2501- // / scalar value.
2502- class VPPartialReductionRecipe : public VPReductionRecipe {
2503- unsigned Opcode;
2504-
2505- // / The divisor by which the VF of this recipe's output should be divided
2506- // / during execution.
2507- unsigned VFScaleFactor;
2508-
2509- public:
2510- VPPartialReductionRecipe (Instruction *ReductionInst, VPValue *Op0,
2511- VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2512- : VPPartialReductionRecipe(ReductionInst->getOpcode (), Op0, Op1, Cond,
2513- VFScaleFactor, ReductionInst) {}
2514- VPPartialReductionRecipe (unsigned Opcode, VPValue *Op0, VPValue *Op1,
2515- VPValue *Cond, unsigned ScaleFactor,
2516- Instruction *ReductionInst = nullptr )
2517- : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2518- FastMathFlags (), ReductionInst,
2519- ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2520- Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2521- [[maybe_unused]] auto *AccumulatorRecipe =
2522- getChainOp ()->getDefiningRecipe ();
2523- assert ((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2524- isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2525- " Unexpected operand order for partial reduction recipe" );
2526- }
2527- ~VPPartialReductionRecipe () override = default ;
2528-
2529- VPPartialReductionRecipe *clone () override {
2530- return new VPPartialReductionRecipe (Opcode, getOperand (0 ), getOperand (1 ),
2531- getCondOp (), VFScaleFactor,
2532- getUnderlyingInstr ());
2533- }
2534-
2535- VP_CLASSOF_IMPL (VPDef::VPPartialReductionSC)
2536-
2537- // / Generate the reduction in the loop.
2538- void execute(VPTransformState &State) override ;
2539-
2540- // / Return the cost of this VPPartialReductionRecipe.
2541- InstructionCost computeCost (ElementCount VF,
2542- VPCostContext &Ctx) const override ;
2543-
2544- // / Get the binary op's opcode.
2545- unsigned getOpcode () const { return Opcode; }
2546-
25472511 // / Get the factor that the VF of this recipe's output should be scaled by.
25482512 unsigned getVFScaleFactor () const { return VFScaleFactor; }
2549-
2550- #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2551- // / Print the recipe.
2552- void print (raw_ostream &O, const Twine &Indent,
2553- VPSlotTracker &SlotTracker) const override ;
2554- #endif
25552513};
25562514
25572515// / A recipe to represent inloop reduction operations with vector-predication
@@ -2567,7 +2525,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
25672525 R.getFastMathFlags(),
25682526 cast_or_null<Instruction>(R.getUnderlyingValue()),
25692527 ArrayRef<VPValue *>({R.getChainOp (), R.getVecOp (), &EVL}), CondOp,
2570- R.isOrdered(), DL) {}
2528+ R.isOrdered(), 0 , DL) {}
25712529
25722530 ~VPReductionEVLRecipe () override = default ;
25732531
@@ -2768,6 +2726,11 @@ class VPSingleDefBundleRecipe : public VPSingleDefRecipe {
27682726 VPWidenRecipe *Mul, VPReductionRecipe *Red)
27692727 : VPSingleDefBundleRecipe(BundleTypes::ExtMulAccumulateReduction,
27702728 {Ext0, Ext1, Mul, Red}) {}
2729+ VPSingleDefBundleRecipe (VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
2730+ VPWidenRecipe *Mul, VPWidenRecipe *Sub,
2731+ VPReductionRecipe *Red)
2732+ : VPSingleDefBundleRecipe(BundleTypes::ExtMulAccumulateReduction,
2733+ {Ext0, Ext1, Mul, Sub, Red}) {}
27712734
27722735 ~VPSingleDefBundleRecipe () override {
27732736 SmallPtrSet<VPRecipeBase *, 4 > Seen;
0 commit comments