-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LV] Bundle sub reductions into VPExpressionRecipe #147255
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
d64d52c
3171e7b
cacb89e
53fca5c
55a9c3e
27e462a
67a0604
33236a3
b988436
0c55abc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3151,23 +3151,23 @@ static VPExpressionRecipe * | |
| tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, | ||
| VPCostContext &Ctx, VFRange &Range) { | ||
| unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()); | ||
| if (Opcode != Instruction::Add) | ||
| if (Opcode != Instruction::Add && Opcode != Instruction::Sub) | ||
| return nullptr; | ||
|
|
||
| Type *RedTy = Ctx.Types.inferScalarType(Red); | ||
|
|
||
| // Clamp the range if using multiply-accumulate-reduction is profitable. | ||
| auto IsMulAccValidAndClampRange = | ||
| [&](bool isZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0, | ||
| [&](bool IsZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0, | ||
| VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt) -> bool { | ||
| return LoopVectorizationPlanner::getDecisionAndClampRange( | ||
| [&](ElementCount VF) { | ||
| TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
| Type *SrcTy = | ||
| Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy; | ||
| auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF)); | ||
| InstructionCost MulAccCost = | ||
| Ctx.TTI.getMulAccReductionCost(isZExt, RedTy, SrcVecTy, CostKind); | ||
| InstructionCost MulAccCost = Ctx.TTI.getMulAccReductionCost( | ||
| IsZExt, Opcode, RedTy, SrcVecTy, CostKind); | ||
| InstructionCost MulCost = Mul->computeCost(VF, Ctx); | ||
| InstructionCost RedCost = Red->computeCost(VF, Ctx); | ||
| InstructionCost ExtCost = 0; | ||
|
|
@@ -3192,7 +3192,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, | |
| dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe()); | ||
| auto *RecipeB = | ||
| dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe()); | ||
| auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe()); | ||
| auto *MulR = cast<VPWidenRecipe>(VecOp->getDefiningRecipe()); | ||
|
||
|
|
||
| // Match reduce.add(mul(ext, ext)). | ||
| if (RecipeA && RecipeB && | ||
|
|
@@ -3201,12 +3201,12 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red, | |
| match(RecipeB, m_ZExtOrSExt(m_VPValue())) && | ||
| IsMulAccValidAndClampRange(RecipeA->getOpcode() == | ||
| Instruction::CastOps::ZExt, | ||
| Mul, RecipeA, RecipeB, nullptr)) { | ||
| return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red); | ||
| MulR, RecipeA, RecipeB, nullptr)) { | ||
| return new VPExpressionRecipe(RecipeA, RecipeB, MulR, Red); | ||
| } | ||
| // Match reduce.add(mul). | ||
| if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr)) | ||
| return new VPExpressionRecipe(Mul, Red); | ||
| if (IsMulAccValidAndClampRange(true, MulR, nullptr, nullptr, nullptr)) | ||
| return new VPExpressionRecipe(MulR, Red); | ||
| } | ||
| // Match reduce.add(ext(mul(ext(A), ext(B)))). | ||
| // All extend recipes must have same opcode or A == B | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1468,8 +1468,8 @@ static void analyzeCostOfVecReduction(const IntrinsicInst &II, | |
| TTI::CastContextHint::None, CostKind, RedOp); | ||
|
|
||
| CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost; | ||
| CostAfterReduction = | ||
| TTI.getMulAccReductionCost(IsUnsigned, II.getType(), ExtType, CostKind); | ||
| CostAfterReduction = TTI.getMulAccReductionCost( | ||
| IsUnsigned, ReductionOpc, II.getType(), ExtType, CostKind); | ||
|
Comment on lines
+1471
to
+1472
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be nice to have a test for this, but not sure if that's possible.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've been trying to make a test but I don't think this code is ever reached. The |
||
| return; | ||
| } | ||
| CostAfterReduction = TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this rename is NFC, maybe remove it from this PR?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.