Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/InstructionCost.h"

namespace {
class GeneratedRTChecks;
}

namespace llvm {

class LoopInfo;
Expand Down Expand Up @@ -554,6 +558,10 @@ class LoopVectorizationPlanner {
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);

/// Attach the runtime checks of \p RTChecks to \p Plan.
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
bool HasBranchWeights) const;

#ifndef NDEBUG
/// \return The most profitable vectorization factor for the available VPlans
/// and the cost of that VF.
Expand Down
236 changes: 84 additions & 152 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,12 +399,6 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));

// Likelyhood of bypassing the vectorized loop because assumptions about SCEV
// variables not overflowing do not hold. See `emitSCEVChecks`.
static constexpr uint32_t SCEVCheckBypassWeights[] = {1, 127};
// Likelyhood of bypassing the vectorized loop because pointers overlap. See
// `emitMemRuntimeChecks`.
static constexpr uint32_t MemCheckBypassWeights[] = {1, 127};
// Likelyhood of bypassing the vectorized loop because there are zero trips left
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
Expand Down Expand Up @@ -544,16 +538,6 @@ class InnerLoopVectorizer {
/// it overflows.
void emitIterationCountCheck(BasicBlock *Bypass);

/// Emit a bypass check to see if all of the SCEV assumptions we've
/// had to make are correct. Returns the block containing the checks or
/// nullptr if no checks have been added.
BasicBlock *emitSCEVChecks(BasicBlock *Bypass);

/// Emit bypass checks to check any memory assumptions we may have made.
/// Returns the block containing the checks or nullptr if no checks have been
/// added.
BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass);

/// Emit basic blocks (prefixed with \p Prefix) for the iteration check,
/// vector loop preheader, middle block and scalar preheader.
void createVectorLoopSkeleton(StringRef Prefix);
Expand Down Expand Up @@ -657,8 +641,6 @@ struct EpilogueLoopVectorizationInfo {
unsigned EpilogueUF = 0;
BasicBlock *MainLoopIterationCountCheck = nullptr;
BasicBlock *EpilogueIterationCountCheck = nullptr;
BasicBlock *SCEVSafetyCheck = nullptr;
BasicBlock *MemSafetyCheck = nullptr;
Value *TripCount = nullptr;
Value *VectorTripCount = nullptr;
VPlan &EpiloguePlan;
Expand Down Expand Up @@ -1786,7 +1768,6 @@ class GeneratedRTChecks {
SCEVExpander MemCheckExp;

bool CostTooHigh = false;
const bool AddBranchWeights;

Loop *OuterLoop = nullptr;

Expand All @@ -1798,11 +1779,10 @@ class GeneratedRTChecks {
public:
GeneratedRTChecks(PredicatedScalarEvolution &PSE, DominatorTree *DT,
LoopInfo *LI, TargetTransformInfo *TTI,
const DataLayout &DL, bool AddBranchWeights,
TTI::TargetCostKind CostKind)
const DataLayout &DL, TTI::TargetCostKind CostKind)
: DT(DT), LI(LI), TTI(TTI), SCEVExp(*PSE.getSE(), DL, "scev.check"),
MemCheckExp(*PSE.getSE(), DL, "scev.check"),
AddBranchWeights(AddBranchWeights), PSE(PSE), CostKind(CostKind) {}
MemCheckExp(*PSE.getSE(), DL, "scev.check"), PSE(PSE),
CostKind(CostKind) {}

/// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
/// accurately estimate the cost of the runtime checks. The blocks are
Expand Down Expand Up @@ -2019,56 +1999,20 @@ class GeneratedRTChecks {
MemCheckBlock->eraseFromParent();
}

/// Adds the generated SCEVCheckBlock before \p LoopVectorPreHeader and
/// adjusts the branches to branch to the vector preheader or \p Bypass,
/// depending on the generated condition.
BasicBlock *emitSCEVChecks(BasicBlock *Bypass,
BasicBlock *LoopVectorPreHeader) {
/// Retrieves the SCEVCheckCond and SCEVCheckBlock that were generated as IR
/// outside VPlan.
std::pair<Value *, BasicBlock *> getSCEVChecks() {
using namespace llvm::PatternMatch;
if (!SCEVCheckCond || match(SCEVCheckCond, m_ZeroInt()))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better optimize branch-on-false elsewhere? (SCEVCheckCond being zero)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this will be possible after this change, although it is probably best done and tested separately as follow-up?

return nullptr;

auto *Pred = LoopVectorPreHeader->getSinglePredecessor();
BranchInst::Create(LoopVectorPreHeader, SCEVCheckBlock);

SCEVCheckBlock->getTerminator()->eraseFromParent();
SCEVCheckBlock->moveBefore(LoopVectorPreHeader);
Pred->getTerminator()->replaceSuccessorWith(LoopVectorPreHeader,
SCEVCheckBlock);

BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheckCond);
if (AddBranchWeights)
setBranchWeights(BI, SCEVCheckBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(SCEVCheckBlock->getTerminator(), &BI);
return SCEVCheckBlock;
}

/// Adds the generated MemCheckBlock before \p LoopVectorPreHeader and adjusts
/// the branches to branch to the vector preheader or \p Bypass, depending on
/// the generated condition.
BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass,
BasicBlock *LoopVectorPreHeader) {
// Check if we generated code that checks in runtime if arrays overlap.
if (!MemRuntimeCheckCond)
return nullptr;

auto *Pred = LoopVectorPreHeader->getSinglePredecessor();
Pred->getTerminator()->replaceSuccessorWith(LoopVectorPreHeader,
MemCheckBlock);
return {nullptr, nullptr};

MemCheckBlock->moveBefore(LoopVectorPreHeader);

BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond);
if (AddBranchWeights) {
setBranchWeights(BI, MemCheckBypassWeights, /*IsExpected=*/false);
}
ReplaceInstWithInst(MemCheckBlock->getTerminator(), &BI);
MemCheckBlock->getTerminator()->setDebugLoc(
Pred->getTerminator()->getDebugLoc());
return {SCEVCheckCond, SCEVCheckBlock};
}

return MemCheckBlock;
/// Retrieves the MemCheckCond and MemCheckBlock that were generated as IR
/// outside VPlan.
std::pair<Value *, BasicBlock *> getMemRuntimeChecks() {
return {MemRuntimeCheckCond, MemCheckBlock};
}

/// Return true if any runtime checks have been added
Expand Down Expand Up @@ -2461,53 +2405,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
"Plan's entry must be TCCCheckBlock");
}

BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
BasicBlock *const SCEVCheckBlock =
RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader);
if (!SCEVCheckBlock)
return nullptr;

assert((!Cost->OptForSize ||
Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled) &&
"Cannot SCEV check stride or overflow when optimizing for size");

introduceCheckBlockInVPlan(SCEVCheckBlock);
return SCEVCheckBlock;
}

BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
BasicBlock *const MemCheckBlock =
RTChecks.emitMemRuntimeChecks(Bypass, LoopVectorPreHeader);

// Check if we generated code that checks in runtime if arrays overlap. We put
// the checks into a separate block to make the more common case of few
// elements faster.
if (!MemCheckBlock)
return nullptr;

// VPlan-native path does not do any analysis for runtime checks currently.
assert((!EnableVPlanNativePath || OrigLoop->begin() == OrigLoop->end()) &&
"Runtime checks are not supported for outer loops yet");

if (Cost->OptForSize) {
assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
"Cannot emit memory checks when optimizing for size, unless forced "
"to vectorize.");
ORE->emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize",
OrigLoop->getStartLoc(),
OrigLoop->getHeader())
<< "Code-size may be reduced by not forcing "
"vectorization, or by source-code modifications "
"eliminating the need for runtime checks "
"(e.g., adding 'restrict').";
});
}

introduceCheckBlockInVPlan(MemCheckBlock);
return MemCheckBlock;
}

/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p
/// VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
/// have a single predecessor, which is rewired to the new VPIRBasicBlock. All
Expand Down Expand Up @@ -2624,15 +2521,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// to the scalar loop.
emitIterationCountCheck(LoopScalarPreHeader);

// Generate the code to check any assumptions that we've made for SCEV
// expressions.
emitSCEVChecks(LoopScalarPreHeader);

// Generate the code that checks in runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
emitMemRuntimeChecks(LoopScalarPreHeader);

replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
return LoopVectorPreHeader;
}
Expand Down Expand Up @@ -7323,11 +7211,22 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
OrigLoop->getHeader()->getContext());
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
bool HasBranchWeights =
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
if (HasBranchWeights) {
std::optional<unsigned> VScale = CM.getVScaleForTuning();
VPlanTransforms::runPass(VPlanTransforms::addBranchWeightToMiddleTerminator,
BestVPlan, BestVF, VScale);
}

if (!VectorizingEpilogue) {
// Checks are the same for all VPlans, added to BestVPlan only for
// compactness.
attachRuntimeChecks(BestVPlan, ILV.RTChecks, HasBranchWeights);
}

// Retrieving VectorPH now when it's easier while VPlan still has Regions.
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hoisted here from 7381 below? (and changed as VectorPH may no longer be the second successor of Entry)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, now done before regions may be removed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth a note, e.g.,
// Retrieving VectorPH now when it's easier while VPlan still has Regions.
?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added, thanks

VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
VPlanTransforms::narrowInterleaveGroups(
Expand Down Expand Up @@ -7375,7 +7274,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(

// 1. Set up the skeleton for vectorization, including vector pre-header and
// middle block. The vector loop is created during VPlan execution.
VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]);
BasicBlock *EntryBB =
cast<VPIRBasicBlock>(BestVPlan.getEntry())->getIRBasicBlock();
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
if (VectorizingEpilogue)
VPlanTransforms::removeDeadRecipes(BestVPlan);
Expand All @@ -7399,6 +7299,13 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
ILV.getOrCreateVectorTripCount(ILV.LoopVectorPreHeader), State);
replaceVPBBWithIRVPBB(VectorPH, State.CFG.PrevBB);

// Move check blocks to their final position.
// TODO: Move as part of VPIRBB execute and update impacted tests.
if (BasicBlock *MemCheckBlock = ILV.RTChecks.getMemRuntimeChecks().second)
MemCheckBlock->moveAfter(EntryBB);
if (BasicBlock *SCEVCheckBlock = ILV.RTChecks.getSCEVChecks().second)
SCEVCheckBlock->moveAfter(EntryBB);

BestVPlan.execute(&State);

// 2.5 When vectorizing the epilogue, fix reduction resume values from the
Expand Down Expand Up @@ -7499,15 +7406,6 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
emitIterationCountCheck(LoopScalarPreHeader, true);
EPI.EpilogueIterationCountCheck->setName("iter.check");

// Generate the code to check any assumptions that we've made for SCEV
// expressions.
EPI.SCEVSafetyCheck = emitSCEVChecks(LoopScalarPreHeader);

// Generate the code that checks at runtime if arrays overlap. We put the
// checks into a separate block to make the more common case of few elements
// faster.
EPI.MemSafetyCheck = emitMemRuntimeChecks(LoopScalarPreHeader);
Comment on lines -7504 to -7509
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are replaced by calling getSCEVCheckBlock() and getMemCheckBlock() instead, so can emitSCEVChecks() and emitMemRuntimeChecks() be simplified, regarding "Mark the check as used, to prevent it from being removed during cleanup"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep should be done in the latest version, thanks


// Generate the iteration count check for the main loop, *after* the check
// for the epilogue loop, so that the path-length is shorter for the case
// that goes directly through the vector epilogue. The longer-path length for
Expand Down Expand Up @@ -7611,11 +7509,14 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);

if (EPI.SCEVSafetyCheck)
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
// Adjust the terminators of runtime check blocks and phis using them.
BasicBlock *SCEVCheckBlock = RTChecks.getSCEVChecks().second;
BasicBlock *MemCheckBlock = RTChecks.getMemRuntimeChecks().second;
if (SCEVCheckBlock)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (SCEVCheckBlock)
if (SCEVCheckBlock && !pred_empty(SCEVCheckBlock))

although the latter case is unclear as noted above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't needed any longer

SCEVCheckBlock->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
if (EPI.MemSafetyCheck)
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
if (MemCheckBlock)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (MemCheckBlock)
if (MemCheckBlock && !pred_empty(MemCheckBlock))

although the latter case is unclear as noted above.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't needed any longer

MemCheckBlock->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);

DT->changeImmediateDominator(LoopScalarPreHeader,
Expand All @@ -7642,10 +7543,10 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
}))
continue;
Phi->removeIncomingValue(EPI.EpilogueIterationCountCheck);
if (EPI.SCEVSafetyCheck)
Phi->removeIncomingValue(EPI.SCEVSafetyCheck);
if (EPI.MemSafetyCheck)
Phi->removeIncomingValue(EPI.MemSafetyCheck);
if (SCEVCheckBlock)
Phi->removeIncomingValue(SCEVCheckBlock);
if (MemCheckBlock)
Phi->removeIncomingValue(MemCheckBlock);
}

replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
Expand Down Expand Up @@ -9380,6 +9281,43 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
VPlanTransforms::runPass(VPlanTransforms::clearReductionWrapFlags, *Plan);
}

void LoopVectorizationPlanner::attachRuntimeChecks(
VPlan &Plan, GeneratedRTChecks &RTChecks, bool HasBranchWeights) const {
const auto &[SCEVCheckCond, SCEVCheckBlock] = RTChecks.getSCEVChecks();
if (SCEVCheckBlock) {
assert((!CM.OptForSize ||
CM.Hints->getForce() == LoopVectorizeHints::FK_Enabled) &&
"Cannot SCEV check stride or overflow when optimizing for size");
Comment on lines +9288 to +9290
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: to be consistent with MemCheckBlock below - is there some remark worth emitting, i.e., that "Code-size may be reduced by not forcing vectorization" (plus perhaps by adding a no-wrap assume and/or assume for possible stride values(?))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, would be good to make consisten, as follow-up?

VPlanTransforms::attachCheckBlock(Plan, SCEVCheckCond, SCEVCheckBlock,
HasBranchWeights);
}
const auto &[MemCheckCond, MemCheckBlock] = RTChecks.getMemRuntimeChecks();
if (MemCheckBlock) {
// VPlan-native path does not do any analysis for runtime checks
// currently.
Comment on lines +9296 to +9297
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: to be consistent with SCEVCheckBlock above - comment and assert regarding native or innermost below also relevant there?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, would be good to make consisten, as follow-up?

assert((!EnableVPlanNativePath || OrigLoop->isInnermost()) &&
"Runtime checks are not supported for outer loops yet");

if (CM.OptForSize) {
assert(
CM.Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
"Cannot emit memory checks when optimizing for size, unless forced "
"to vectorize.");
ORE->emit([&]() {
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationCodeSize",
OrigLoop->getStartLoc(),
OrigLoop->getHeader())
<< "Code-size may be reduced by not forcing "
"vectorization, or by source-code modifications "
"eliminating the need for runtime checks "
"(e.g., adding 'restrict').";
});
}
Comment on lines +9301 to +9315
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: should all this be under ifndef NDEBUG?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, as the remarks also need to get emitted when building w/o assertions.

VPlanTransforms::attachCheckBlock(Plan, MemCheckCond, MemCheckBlock,
HasBranchWeights);
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

}

void VPDerivedIVRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "VPDerivedIVRecipe being replicated.");

Expand Down Expand Up @@ -9501,10 +9439,7 @@ static bool processLoopInVPlanNativePath(
VPlan &BestPlan = LVP.getPlanFor(VF.Width);

{
bool AddBranchWeights =
hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(),
AddBranchWeights, CM.CostKind);
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: why build Checks in native - just to ensure they're empty?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably to avoid special casing code .

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps worth a comment, to clarify usage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can do separately, thanks

InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
VF.Width, 1, &CM, BFI, PSI, Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
Expand Down Expand Up @@ -10142,10 +10077,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (ORE->allowExtraAnalysis(LV_NAME))
LVP.emitInvalidCostRemarks(ORE);

bool AddBranchWeights =
hasBranchWeightMD(*L->getLoopLatch()->getTerminator());
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(),
AddBranchWeights, CM.CostKind);
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent: should Checks be defined, create()'d (and added to VPlan) somewhere else? Perhaps earlier during LVP.plan() or LVP.computeBestVF() above, or later before Checks.create() and isOutsideLoopWorkProfitable() below?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current placement is to serve all users (needs to be passed to ILV at the moment).

Once connected to the VPlan, isOutsideLoopWorkProfitable could just get the cost from VPlan, but currently epilogue ILV still needs to retrieve the check blocks; perhaps they could also be fetched from the VPlan, but probably better done separately?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, worth leaving a TODO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, that should already be ensured by the current placement and its users?

if (LVP.hasPlanWithVF(VF.Width)) {
// Select the interleave count.
IC = CM.selectInterleaveCount(LVP.getPlanFor(VF.Width), VF.Width, VF.Cost);
Expand Down
Loading