Skip to content

Commit

Permalink
[VPlan] Remove VPIteration, update to use directly VPLane instead (NFC)
Browse files Browse the repository at this point in the history
After 8ec4067 (#95842),
only the lane part of VPIteration is used.

Simplify the code by replacing remaining uses of VPIteration with VPLane directly.
  • Loading branch information
fhahn committed Sep 25, 2024
1 parent 556ec4a commit aae7ac6
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 134 deletions.
44 changes: 21 additions & 23 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,7 @@ class InnerLoopVectorizer {
/// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p
/// Instr's operands.
void scalarizeInstruction(const Instruction *Instr,
VPReplicateRecipe *RepRecipe,
const VPIteration &Instance,
VPReplicateRecipe *RepRecipe, const VPLane &Lane,
VPTransformState &State);

/// Fix the non-induction PHIs in \p Plan.
Expand Down Expand Up @@ -2322,14 +2321,14 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {

void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
VPReplicateRecipe *RepRecipe,
const VPIteration &Instance,
const VPLane &Lane,
VPTransformState &State) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");

// llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for
// the first lane and part.
if (isa<NoAliasScopeDeclInst>(Instr))
if (!Instance.isFirstIteration())
if (!Lane.isFirstLane())
return;

// Does this instruction return a value ?
Expand All @@ -2354,18 +2353,18 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
for (const auto &I : enumerate(RepRecipe->operands())) {
auto InputInstance = Instance;
auto InputLane = Lane;
VPValue *Operand = I.value();
if (vputils::isUniformAfterVectorization(Operand))
InputInstance.Lane = VPLane::getFirstLane();
Cloned->setOperand(I.index(), State.get(Operand, InputInstance));
InputLane = VPLane::getFirstLane();
Cloned->setOperand(I.index(), State.get(Operand, InputLane));
}
State.addNewMetadata(Cloned, Instr);

// Place the cloned scalar in the new loop.
State.Builder.Insert(Cloned);

State.set(RepRecipe, Cloned, Instance);
State.set(RepRecipe, Cloned, Lane);

// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<AssumeInst>(Cloned))
Expand Down Expand Up @@ -2784,7 +2783,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
assert(StepVPV && "step must have been expanded during VPlan execution");
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
: State.get(StepVPV, {0, 0});
: State.get(StepVPV, VPLane(0));
Value *Escape =
emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step,
II.getKind(), II.getInductionBinOp());
Expand Down Expand Up @@ -7435,8 +7434,7 @@ static void createAndCollectMergePhiForReduction(
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();

Value *FinalValue =
State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
auto *ResumePhi =
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
Expand Down Expand Up @@ -7525,7 +7523,7 @@ LoopVectorizationPlanner::executePlan(
BestVPlan.getPreheader()->execute(&State);
}
if (!ILV.getTripCount())
ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0}));
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
else
assert(IsEpilogueVectorization && "should only re-use the existing trip "
"count during epilogue vectorization");
Expand Down Expand Up @@ -9409,48 +9407,48 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}

void VPDerivedIVRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "VPDerivedIVRecipe being replicated.");
assert(!State.Lane && "VPDerivedIVRecipe being replicated.");

// Fast-math-flags propagate from the original induction instruction.
IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
if (FPBinOp)
State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags());

Value *Step = State.get(getStepValue(), VPIteration(0, 0));
Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0));
Value *Step = State.get(getStepValue(), VPLane(0));
Value *CanonicalIV = State.get(getOperand(1), VPLane(0));
Value *DerivedIV = emitTransformedIndex(
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
Kind, cast_if_present<BinaryOperator>(FPBinOp));
DerivedIV->setName("offset.idx");
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");

State.set(this, DerivedIV, VPIteration(0, 0));
State.set(this, DerivedIV, VPLane(0));
}

void VPReplicateRecipe::execute(VPTransformState &State) {
Instruction *UI = getUnderlyingInstr();
if (State.Instance) { // Generate a single instance.
if (State.Lane) { // Generate a single instance.
assert((State.VF.isScalar() || !isUniform()) &&
"uniform recipe shouldn't be predicated");
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
State.ILV->scalarizeInstruction(UI, this, *State.Instance, State);
State.ILV->scalarizeInstruction(UI, this, *State.Lane, State);
// Insert scalar instance packing it into a vector.
if (State.VF.isVector() && shouldPack()) {
// If we're constructing lane 0, initialize to start from poison.
if (State.Instance->Lane.isFirstLane()) {
if (State.Lane->isFirstLane()) {
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
Value *Poison = PoisonValue::get(
VectorType::get(UI->getType(), State.VF));
State.set(this, Poison);
}
State.packScalarIntoVectorValue(this, *State.Instance);
State.packScalarIntoVectorValue(this, *State.Lane);
}
return;
}

if (IsUniform) {
// Uniform within VL means we need to generate lane 0.
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
State.ILV->scalarizeInstruction(UI, this, VPLane(0), State);
return;
}

Expand All @@ -9459,15 +9457,15 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
if (isa<StoreInst>(UI) &&
vputils::isUniformAfterVectorization(getOperand(1))) {
auto Lane = VPLane::getLastLaneForVF(State.VF);
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State);
return;
}

// Generate scalar instances for all VF lanes.
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
const unsigned EndLane = State.VF.getKnownMinValue();
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State);
}

// Determine how to lower the scalar epilogue, which depends on 1) optimising
Expand Down
53 changes: 26 additions & 27 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,28 +228,27 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}

Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
if (Def->isLiveIn())
return Def->getLiveInIRValue();

if (hasScalarValue(Def, Instance)) {
return Data.VPV2Scalars[Def][Instance.Lane.mapToCacheIndex(VF)];
}
if (!Instance.Lane.isFirstLane() &&
vputils::isUniformAfterVectorization(Def) &&
hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) {
if (hasScalarValue(Def, Lane))
return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];

if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) &&
hasScalarValue(Def, VPLane::getFirstLane())) {
return Data.VPV2Scalars[Def][0];
}

assert(hasVectorValue(Def));
auto *VecPart = Data.VPV2Vector[Def];
if (!VecPart->getType()->isVectorTy()) {
assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
return VecPart;
}
// TODO: Cache created scalar values.
Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
// set(Def, Extract, Instance);
return Extract;
}
Expand All @@ -258,11 +257,11 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
if (NeedsScalar) {
assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
!vputils::onlyFirstLaneUsed(Def) ||
(hasScalarValue(Def, VPIteration(0, 0)) &&
(hasScalarValue(Def, VPLane(0)) &&
Data.VPV2Scalars[Def].size() == 1)) &&
"Trying to access a single scalar per part but has multiple scalars "
"per part.");
return get(Def, VPIteration(0, 0));
return get(Def, VPLane(0));
}

// If Values have been set for this Def return the one relevant for \p Part.
Expand All @@ -289,15 +288,15 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
return Shuf;
};

if (!hasScalarValue(Def, {0, 0})) {
if (!hasScalarValue(Def, {0})) {
assert(Def->isLiveIn() && "expected a live-in");
Value *IRV = Def->getLiveInIRValue();
Value *B = GetBroadcastInstrs(IRV);
set(Def, B);
return B;
}

Value *ScalarValue = get(Def, {0, 0});
Value *ScalarValue = get(Def, VPLane(0));
// If we aren't vectorizing, we can just copy the scalar map values over
// to the vector map.
if (VF.isScalar()) {
Expand All @@ -307,9 +306,9 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {

bool IsUniform = vputils::isUniformAfterVectorization(Def);

unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1);
// Check if there is a scalar value for the selected lane.
if (!hasScalarValue(Def, {0, LastLane})) {
if (!hasScalarValue(Def, LastLane)) {
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
// VPExpandSCEVRecipes can also be uniform.
assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
Expand All @@ -320,7 +319,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}

auto *LastInst = cast<Instruction>(get(Def, {0, LastLane}));
auto *LastInst = cast<Instruction>(get(Def, LastLane));
// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
// will directly follow the scalar definitions.
Expand All @@ -347,7 +346,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(Def, {0, Lane});
packScalarIntoVectorValue(Def, Lane);
VectorValue = get(Def);
}
Builder.restoreIP(OldIP);
Expand Down Expand Up @@ -401,11 +400,11 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) {
}

void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
const VPIteration &Instance) {
Value *ScalarInst = get(Def, Instance);
const VPLane &Lane) {
Value *ScalarInst = get(Def, Lane);
Value *VectorValue = get(Def);
VectorValue = Builder.CreateInsertElement(
VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF));
VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst,
Lane.getAsRuntimeExpr(Builder, VF));
set(Def, VectorValue);
}

Expand Down Expand Up @@ -483,7 +482,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
}

void VPBasicBlock::execute(VPTransformState *State) {
bool Replica = State->Instance && !State->Instance->isFirstIteration();
bool Replica = bool(State->Lane);
VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
VPBlockBase *SingleHPred = nullptr;
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
Expand Down Expand Up @@ -765,14 +764,14 @@ void VPRegionBlock::execute(VPTransformState *State) {
return;
}

assert(!State->Instance && "Replicating a Region with non-null instance.");
assert(!State->Lane && "Replicating a Region with non-null instance.");

// Enter replicating mode.
State->Instance = VPIteration(0, 0);
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
State->Lane = VPLane(0);
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
State->Lane = VPLane(Lane, VPLane::Kind::First);
// Visit the VPBlocks connected to \p this, starting from it.
for (VPBlockBase *Block : RPOT) {
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
Expand All @@ -781,7 +780,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
}

// Exit replicating mode.
State->Instance.reset();
State->Lane.reset();
}

InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {
Expand Down
Loading

0 comments on commit aae7ac6

Please sign in to comment.