Skip to content

Commit

Permalink
[VPlan] Only store single vector per VPValue in VPTransformState. (NFC)
Browse files Browse the repository at this point in the history
After 8ec4067 (#95842),
VPTransformState only stores a single vector value per VPValue.

Simplify the code by replacing the SmallVector in PerPartOutput with a
single Value * and rename to VPV2Vector for clarity.

Also remove the redundant Part argument from various accessors.
  • Loading branch information
fhahn committed Sep 23, 2024
1 parent 31ac3d0 commit 57f5d8f
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 162 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3075,13 +3075,13 @@ void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan,
VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
if (!VPPhi)
continue;
PHINode *NewPhi = cast<PHINode>(State.get(VPPhi, 0));
PHINode *NewPhi = cast<PHINode>(State.get(VPPhi));
// Make sure the builder has a valid insert point.
Builder.SetInsertPoint(NewPhi);
for (unsigned Idx = 0; Idx < VPPhi->getNumOperands(); ++Idx) {
VPValue *Inc = VPPhi->getIncomingValue(Idx);
VPBasicBlock *VPBB = VPPhi->getIncomingBlock(Idx);
NewPhi->addIncoming(State.get(Inc, 0), State.CFG.VPBB2IRBB[VPBB]);
NewPhi->addIncoming(State.get(Inc), State.CFG.VPBB2IRBB[VPBB]);
}
}
}
Expand Down Expand Up @@ -9445,7 +9445,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
Value *Poison = PoisonValue::get(
VectorType::get(UI->getType(), State.VF));
State.set(this, Poison, State.Instance->Part);
State.set(this, Poison);
}
State.packScalarIntoVectorValue(this, *State.Instance);
}
Expand Down
54 changes: 26 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
return Data.PerPartScalars[Def][Instance.Part][0];
}

assert(hasVectorValue(Def, Instance.Part));
auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
assert(hasVectorValue(Def));
auto *VecPart = Data.VPV2Vector[Def];
if (!VecPart->getType()->isVectorTy()) {
assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
return VecPart;
Expand All @@ -255,20 +255,20 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
return Extract;
}

Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
if (NeedsScalar) {
assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def, Part) ||
assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
!vputils::onlyFirstLaneUsed(Def) ||
(hasScalarValue(Def, VPIteration(Part, 0)) &&
Data.PerPartScalars[Def][Part].size() == 1)) &&
(hasScalarValue(Def, VPIteration(0, 0)) &&
Data.PerPartScalars[Def][0].size() == 1)) &&
"Trying to access a single scalar per part but has multiple scalars "
"per part.");
return get(Def, VPIteration(Part, 0));
return get(Def, VPIteration(0, 0));
}

// If Values have been set for this Def return the one relevant for \p Part.
if (hasVectorValue(Def, Part))
return Data.PerPartOutput[Def][Part];
if (hasVectorValue(Def))
return Data.VPV2Vector[Def];

auto GetBroadcastInstrs = [this, Def](Value *V) {
bool SafeToHoist = Def->isDefinedOutsideLoopRegions();
Expand All @@ -290,29 +290,27 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
return Shuf;
};

if (!hasScalarValue(Def, {Part, 0})) {
if (!hasScalarValue(Def, {0, 0})) {
assert(Def->isLiveIn() && "expected a live-in");
if (Part != 0)
return get(Def, 0);
Value *IRV = Def->getLiveInIRValue();
Value *B = GetBroadcastInstrs(IRV);
set(Def, B, Part);
set(Def, B);
return B;
}

Value *ScalarValue = get(Def, {Part, 0});
Value *ScalarValue = get(Def, {0, 0});
// If we aren't vectorizing, we can just copy the scalar map values over
// to the vector map.
if (VF.isScalar()) {
set(Def, ScalarValue, Part);
set(Def, ScalarValue);
return ScalarValue;
}

bool IsUniform = vputils::isUniformAfterVectorization(Def);

unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
// Check if there is a scalar value for the selected lane.
if (!hasScalarValue(Def, {Part, LastLane})) {
if (!hasScalarValue(Def, {0, LastLane})) {
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
// VPExpandSCEVRecipes can also be uniform.
assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
Expand All @@ -323,7 +321,7 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
LastLane = 0;
}

auto *LastInst = cast<Instruction>(get(Def, {Part, LastLane}));
auto *LastInst = cast<Instruction>(get(Def, {0, LastLane}));
// Set the insert point after the last scalarized instruction or after the
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
// will directly follow the scalar definitions.
Expand All @@ -343,15 +341,15 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
Value *VectorValue = nullptr;
if (IsUniform) {
VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue, Part);
set(Def, VectorValue);
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
set(Def, Undef, Part);
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(Def, {Part, Lane});
VectorValue = get(Def, Part);
packScalarIntoVectorValue(Def, {0, Lane});
VectorValue = get(Def);
}
Builder.restoreIP(OldIP);
return VectorValue;
Expand Down Expand Up @@ -406,10 +404,10 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) {
void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
const VPIteration &Instance) {
Value *ScalarInst = get(Def, Instance);
Value *VectorValue = get(Def, Instance.Part);
Value *VectorValue = get(Def);
VectorValue = Builder.CreateInsertElement(
VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF));
set(Def, VectorValue, Instance.Part);
set(Def, VectorValue);
}

BasicBlock *
Expand Down Expand Up @@ -1074,12 +1072,12 @@ void VPlan::execute(VPTransformState *State) {
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
PHINode *Phi = nullptr;
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
} else {
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
"recipe generating only scalars should have been replaced");
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
Phi = cast<PHINode>(GEP->getPointerOperand());
}

Expand All @@ -1092,7 +1090,7 @@ void VPlan::execute(VPTransformState *State) {

// Use the steps for the last part as backedge value for the induction.
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand(), 0));
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
continue;
}

Expand All @@ -1101,8 +1099,8 @@ void VPlan::execute(VPTransformState *State) {
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
Value *Phi = State->get(PhiR, 0, NeedsScalar);
Value *Val = State->get(PhiR->getBackedgeValue(), 0, NeedsScalar);
Value *Phi = State->get(PhiR, NeedsScalar);
Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}

Expand Down
46 changes: 16 additions & 30 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -263,30 +263,22 @@ struct VPTransformState {
std::optional<VPIteration> Instance;

struct DataState {
/// A type for vectorized values in the new loop. Each value from the
/// original loop, when vectorized, is represented by UF vector values in
/// the new unrolled loop, where UF is the unroll factor.
typedef SmallVector<Value *, 2> PerPartValuesTy;

DenseMap<VPValue *, PerPartValuesTy> PerPartOutput;
// Each value from the original loop, when vectorized, is represented by a
// vector value in the map.
DenseMap<VPValue *, Value *> VPV2Vector;

using ScalarsPerPartValuesTy = SmallVector<SmallVector<Value *, 4>, 2>;

This comment has been minimized.

Copy link
@ayalz

ayalz Sep 24, 2024

Collaborator

PerPart should be reduced here as well.

This comment has been minimized.

Copy link
@fhahn

fhahn Sep 24, 2024

Author Contributor

This was done in f76dae1

What remains is to remove VPIteration, which should be done soon

This comment has been minimized.

Copy link
@fhahn

fhahn Sep 25, 2024

Author Contributor

The remaining uses of VPIteration have been replaced by VPLane directly in aae7ac6

DenseMap<VPValue *, ScalarsPerPartValuesTy> PerPartScalars;
} Data;

/// Get the generated vector Value for a given VPValue \p Def and a given \p
/// Part if \p IsScalar is false, otherwise return the generated scalar
/// for \p Part. \See set.
Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
/// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
/// is false, otherwise return the generated scalar. \See set.
Value *get(VPValue *Def, bool IsScalar = false);

/// Get the generated Value for a given VPValue and given Part and Lane.
Value *get(VPValue *Def, const VPIteration &Instance);

bool hasVectorValue(VPValue *Def, unsigned Part) {
auto I = Data.PerPartOutput.find(Def);
return I != Data.PerPartOutput.end() && Part < I->second.size() &&
I->second[Part];
}
bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }

bool hasScalarValue(VPValue *Def, VPIteration Instance) {
auto I = Data.PerPartScalars.find(Def);
Expand All @@ -298,28 +290,22 @@ struct VPTransformState {
I->second[Instance.Part][CacheIdx];
}

/// Set the generated vector Value for a given VPValue and a given Part, if \p
/// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
/// Set the generated vector Value for a given VPValue, if \p
/// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
void set(VPValue *Def, Value *V, bool IsScalar = false) {
if (IsScalar) {
set(Def, V, VPIteration(Part, 0));
set(Def, V, VPIteration(0, 0));
return;
}
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
"scalar values must be stored as (Part, 0)");
if (!Data.PerPartOutput.count(Def)) {
DataState::PerPartValuesTy Entry(1);
Data.PerPartOutput[Def] = Entry;
}
Data.PerPartOutput[Def][Part] = V;
"scalar values must be stored as (0, 0)");
Data.VPV2Vector[Def] = V;
}

/// Reset an existing vector value for \p Def and a given \p Part.
void reset(VPValue *Def, Value *V, unsigned Part) {
auto Iter = Data.PerPartOutput.find(Def);
assert(Iter != Data.PerPartOutput.end() &&
"need to overwrite existing value");
Iter->second[Part] = V;
void reset(VPValue *Def, Value *V) {
assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
Data.VPV2Vector[Def] = V;
}

/// Set the generated scalar \p V for \p Def and the given \p Instance.
Expand Down
Loading

0 comments on commit 57f5d8f

Please sign in to comment.