Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
9aba342
Precommit tests
lukel97 Jul 30, 2025
ea2b861
[RISCV] Handle recurrences in RISCVVLOptimizer
lukel97 Feb 7, 2025
9f24fe7
Link to talk in header comment
lukel97 Jul 30, 2025
9b61df6
Fix test name
lukel97 Aug 22, 2025
a78cc60
Merge branch 'main' of github.com:llvm/llvm-project into vloptimizer/…
lukel97 Sep 2, 2025
257ed3c
Reword a comment to be more clear
lukel97 Sep 3, 2025
4d80e45
Merge branch 'main' of github.com:llvm/llvm-project into vloptimizer/…
lukel97 Sep 4, 2025
5465920
Merge branch 'main' of github.com:llvm/llvm-project into vloptimizer/…
lukel97 Sep 4, 2025
a31269c
Merge branch 'main' of github.com:llvm/llvm-project into vloptimizer/…
lukel97 Sep 5, 2025
97a12b1
Merge branch 'main' of github.com:llvm/llvm-project into vloptimizer/…
lukel97 Sep 5, 2025
8f75db7
Make vector_uses just a static function
lukel97 Sep 5, 2025
d550870
clang-format
lukel97 Sep 5, 2025
dc0ca0e
Remove debug code
lukel97 Sep 6, 2025
1dd36a4
Merge branch 'main' into vloptimizer/dataflow-analysis
lukel97 Sep 10, 2025
ebce546
Add vleff recurrence test case
lukel97 Sep 11, 2025
cadf393
Move max into DemandedVL
lukel97 Sep 12, 2025
0ef2baf
Move isVirtualVec into virtual_vec_uses method, avoid pointer chasing
lukel97 Sep 12, 2025
1068f3a
Rename Worklist in checkUsers to avoid shadowing
lukel97 Sep 12, 2025
62635d5
Merge branch 'main' into vloptimizer/dataflow-analysis
lukel97 Sep 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 101 additions & 53 deletions llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,19 @@
// instructions are inserted.
//
// The purpose of this optimization is to make the VL argument, for instructions
// that have a VL argument, as small as possible. This is implemented by
// visiting each instruction in reverse order and checking that if it has a VL
// argument, whether the VL can be reduced.
// that have a VL argument, as small as possible.
//
// This is split into a sparse dataflow analysis where we determine what VL is
// demanded by each instruction first, and then afterwards try to reduce the VL
// of each instruction if it demands less than its VL operand.
//
// The analysis is explained in more detail in the 2025 EuroLLVM Developers'
// Meeting talk "Accidental Dataflow Analysis: Extending the RISC-V VL
// Optimizer", which is available on YouTube at
// https://www.youtube.com/watch?v=Mfb5fRSdJAc
//
// The slides for the talk are available at
// https://llvm.org/devmtg/2025-04/slides/technical_talk/lau_accidental_dataflow.pdf
//
//===---------------------------------------------------------------------===//

Expand All @@ -30,6 +40,27 @@ using namespace llvm;

namespace {

/// Wrapper around MachineOperand that defaults to immediate 0.
struct DemandedVL {
MachineOperand VL;
DemandedVL() : VL(MachineOperand::CreateImm(0)) {}
DemandedVL(MachineOperand VL) : VL(VL) {}
static DemandedVL vlmax() {
return DemandedVL(MachineOperand::CreateImm(RISCV::VLMaxSentinel));
}
bool operator!=(const DemandedVL &Other) const {
return !VL.isIdenticalTo(Other.VL);
}
};

static DemandedVL max(const DemandedVL &LHS, const DemandedVL &RHS) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move this into the DemandedVL class so that it has to be called as DemandedVL::max?

if (RISCV::isVLKnownLE(LHS.VL, RHS.VL))
return RHS;
if (RISCV::isVLKnownLE(RHS.VL, LHS.VL))
return LHS;
return DemandedVL::vlmax();
}

class RISCVVLOptimizer : public MachineFunctionPass {
const MachineRegisterInfo *MRI;
const MachineDominatorTree *MDT;
Expand All @@ -51,17 +82,17 @@ class RISCVVLOptimizer : public MachineFunctionPass {
StringRef getPassName() const override { return PASS_NAME; }

private:
std::optional<MachineOperand>
getMinimumVLForUser(const MachineOperand &UserOp) const;
/// Returns the largest common VL MachineOperand that may be used to optimize
/// MI. Returns std::nullopt if it failed to find a suitable VL.
std::optional<MachineOperand> checkUsers(const MachineInstr &MI) const;
DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const;
/// Returns true if the users of \p MI have compatible EEWs and SEWs.
bool checkUsers(const MachineInstr &MI) const;
bool tryReduceVL(MachineInstr &MI) const;
bool isCandidate(const MachineInstr &MI) const;
void transfer(const MachineInstr &MI);

/// For a given instruction, records what elements of it are demanded by
/// downstream users.
DenseMap<const MachineInstr *, std::optional<MachineOperand>> DemandedVLs;
DenseMap<const MachineInstr *, DemandedVL> DemandedVLs;
SetVector<const MachineInstr *> Worklist;
};

/// Represents the EMUL and EEW of a MachineOperand.
Expand Down Expand Up @@ -813,6 +844,7 @@ static std::optional<OperandInfo> getOperandInfo(const MachineOperand &MO) {
const MachineInstr &MI = *MO.getParent();
const RISCVVPseudosTable::PseudoInfo *RVV =
RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
MI.dump();
assert(RVV && "Could not find MI in PseudoTable");

std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO);
Expand Down Expand Up @@ -847,10 +879,15 @@ static std::optional<OperandInfo> getOperandInfo(const MachineOperand &MO) {
return OperandInfo(getEMULEqualsEEWDivSEWTimesLMUL(*Log2EEW, MI), *Log2EEW);
}

static bool isTupleInsertInstr(const MachineInstr &MI);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forward declared here to remove code motion from the diff, will remove in a follow-up commit


/// Return true if this optimization should consider MI for VL reduction. This
/// white-list approach simplifies this optimization for instructions that may
/// have more complex semantics with relation to how it uses VL.
static bool isSupportedInstr(const MachineInstr &MI) {
if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
return true;

const RISCVVPseudosTable::PseudoInfo *RVV =
RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());

Expand Down Expand Up @@ -1348,21 +1385,24 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
return true;
}

std::optional<MachineOperand>
DemandedVL
RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
const MachineInstr &UserMI = *UserOp.getParent();
const MCInstrDesc &Desc = UserMI.getDesc();

if (UserMI.isPHI() || UserMI.isFullCopy() || isTupleInsertInstr(UserMI))
return DemandedVLs.lookup(&UserMI);

if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
" use VLMAX\n");
return std::nullopt;
return DemandedVL::vlmax();
}

if (RISCVII::readsPastVL(
TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
return std::nullopt;
return DemandedVL::vlmax();
}

unsigned VLOpNum = RISCVII::getVLOpNum(Desc);
Expand All @@ -1376,11 +1416,10 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
if (UserOp.isTied()) {
assert(UserOp.getOperandNo() == UserMI.getNumExplicitDefs() &&
RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc()));
auto DemandedVL = DemandedVLs.lookup(&UserMI);
if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) {
if (!RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp)) {
LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
"instruction with demanded tail\n");
return std::nullopt;
return DemandedVL::vlmax();
}
}

Expand All @@ -1393,11 +1432,8 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {

// If we know the demanded VL of UserMI, then we can reduce the VL it
// requires.
if (auto DemandedVL = DemandedVLs.lookup(&UserMI)) {
assert(isCandidate(UserMI));
if (RISCV::isVLKnownLE(*DemandedVL, VLOp))
return DemandedVL;
}
if (RISCV::isVLKnownLE(DemandedVLs.lookup(&UserMI).VL, VLOp))
return DemandedVLs.lookup(&UserMI);

return VLOp;
}
Expand Down Expand Up @@ -1450,9 +1486,10 @@ static bool isSegmentedStoreInstr(const MachineInstr &MI) {
}
}

std::optional<MachineOperand>
RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
std::optional<MachineOperand> CommonVL;
bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
if (MI.isPHI() || MI.isFullCopy() || isTupleInsertInstr(MI))
return true;

SmallSetVector<MachineOperand *, 8> Worklist;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This name now shadows the new MachineInstr* Worklist in the class. Should we disambiguate them?

SmallPtrSet<const MachineInstr *, 4> PHISeen;
for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
Expand Down Expand Up @@ -1481,7 +1518,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
// whole register group).
if (!isTupleInsertInstr(CandidateMI) &&
!isSegmentedStoreInstr(CandidateMI))
return std::nullopt;
return false;
Worklist.insert(&UseOp);
}
continue;
Expand All @@ -1497,23 +1534,9 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
continue;
}

auto VLOp = getMinimumVLForUser(UserOp);
if (!VLOp)
return std::nullopt;

// Use the largest VL among all the users. If we cannot determine this
// statically, then we cannot optimize the VL.
if (!CommonVL || RISCV::isVLKnownLE(*CommonVL, *VLOp)) {
CommonVL = *VLOp;
LLVM_DEBUG(dbgs() << " User VL is: " << VLOp << "\n");
} else if (!RISCV::isVLKnownLE(*VLOp, *CommonVL)) {
LLVM_DEBUG(dbgs() << " Abort because cannot determine a common VL\n");
return std::nullopt;
}

if (!RISCVII::hasSEWOp(UserMI.getDesc().TSFlags)) {
LLVM_DEBUG(dbgs() << " Abort due to lack of SEW operand\n");
return std::nullopt;
return false;
}

std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp);
Expand All @@ -1522,7 +1545,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
return std::nullopt;
return false;
}

if (!OperandInfo::areCompatible(*ProducerInfo, *ConsumerInfo)) {
Expand All @@ -1531,11 +1554,11 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
<< " Abort due to incompatible information for EMUL or EEW.\n");
LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
LLVM_DEBUG(dbgs() << " ProducerInfo is: " << ProducerInfo << "\n");
return std::nullopt;
return false;
}
}

return CommonVL;
return true;
}

bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
Expand All @@ -1551,9 +1574,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
return false;
}

auto CommonVL = DemandedVLs.lookup(&MI);
if (!CommonVL)
return false;
auto *CommonVL = &DemandedVLs.at(&MI).VL;

assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
"Expected VL to be an Imm or virtual Reg");
Expand All @@ -1564,7 +1585,7 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
!MDT->dominates(VLMI, &MI))
CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
CommonVL = &VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
}

if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
Expand Down Expand Up @@ -1599,6 +1620,30 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
return true;
}

static bool isPhysical(const MachineOperand &MO) {
return MO.isReg() && MO.getReg().isPhysical();
}

static bool isVirtualVec(const MachineOperand &MO) {
return MO.isReg() && MO.getReg().isVirtual() &&
RISCVRegisterInfo::isRVVRegClass(
MO.getParent()->getMF()->getRegInfo().getRegClass(MO.getReg()));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels like a lot of pointer chasing to do on every operand.

Can we make this a lambda in the one function that calls it and capture MRI from the RISCVVLOptimizer class?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I originally had a helper method in the RISCVVLOptimizer class that captured MRI in ea2b861, I've added it back in 0ef2baf

}

/// Look through \p MI's operands and propagate what it demands to its uses.
void RISCVVLOptimizer::transfer(const MachineInstr &MI) {
if (!isSupportedInstr(MI) || !checkUsers(MI) || any_of(MI.defs(), isPhysical))
DemandedVLs[&MI] = DemandedVL::vlmax();

for (const MachineOperand &MO : make_filter_range(MI.uses(), isVirtualVec)) {
const MachineInstr *Def = MRI->getVRegDef(MO.getReg());
DemandedVL Prev = DemandedVLs[Def];
DemandedVLs[Def] = max(DemandedVLs[Def], getMinimumVLForUser(MO));
if (DemandedVLs[Def] != Prev)
Worklist.insert(Def);
}
}

bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
Expand All @@ -1614,15 +1659,18 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {

assert(DemandedVLs.empty());

// For each instruction that defines a vector, compute what VL its
// downstream users demand.
// For each instruction that defines a vector, propagate the VL it
// uses to its inputs.
for (MachineBasicBlock *MBB : post_order(&MF)) {
assert(MDT->isReachableFromEntry(MBB));
for (MachineInstr &MI : reverse(*MBB)) {
if (!isCandidate(MI))
continue;
DemandedVLs.insert({&MI, checkUsers(MI)});
}
for (MachineInstr &MI : reverse(*MBB))
Worklist.insert(&MI);
}

while (!Worklist.empty()) {
const MachineInstr *MI = Worklist.front();
Worklist.remove(MI);
transfer(*MI);
}

// Then go through and see if we can reduce the VL of any instructions to
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/reproducer-pr146855.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ target triple = "riscv64-unknown-linux-gnu"
define i32 @_ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_(<vscale x 4 x i32> %wide.load, <vscale x 4 x i1> %0, <vscale x 4 x i1> %1, <vscale x 4 x i1> %2, <vscale x 4 x i1> %3) #0 {
; CHECK-LABEL: _ZN4Mesh12rezone_countESt6vectorIiSaIiEERiS3_:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, ma
; CHECK-NEXT: vmv1r.v v8, v0
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vmv.v.i v14, 0
; CHECK-NEXT: .LBB0_1: # %vector.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu
; CHECK-NEXT: vsetivli zero, 0, e32, m2, ta, mu
; CHECK-NEXT: vmv1r.v v0, v8
; CHECK-NEXT: slli a0, a0, 2
; CHECK-NEXT: vmv2r.v v16, v10
Expand Down
52 changes: 52 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -238,3 +238,55 @@ define void @segmented_store_insert_subreg(<vscale x 4 x float> %v0, <vscale x 4
call void @llvm.riscv.vsseg3(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %t2, ptr %p, iXLen %vl, iXLen 5)
ret void
}

define void @recurrence(<vscale x 4 x i32> %v, ptr %p, iXLen %n, iXLen %vl) {
; CHECK-LABEL: recurrence:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; CHECK-NEXT: vmv.v.i v10, 0
; CHECK-NEXT: .LBB16_1: # %loop
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: addi a1, a1, -1
; CHECK-NEXT: vadd.vv v10, v10, v8
; CHECK-NEXT: bnez a1, .LBB16_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: vse32.v v10, (a0)
; CHECK-NEXT: ret
entry:
br label %loop
loop:
%iv = phi iXLen [ 0, %entry ], [ %iv.next, %loop ]
%phi = phi <vscale x 4 x i32> [ zeroinitializer, %entry ], [ %x, %loop ]
%x = add <vscale x 4 x i32> %phi, %v
%iv.next = add iXLen %iv, 1
%done = icmp eq iXLen %iv.next, %n
br i1 %done, label %exit, label %loop
exit:
call void @llvm.riscv.vse(<vscale x 4 x i32> %x, ptr %p, iXLen %vl)
ret void
}

define <vscale x 4 x i32> @join(<vscale x 4 x i32> %v, i1 %cond, iXLen %vl) {
; CHECK-LABEL: join:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: andi a0, a0, 1
; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: beqz a0, .LBB17_2
; CHECK-NEXT: # %bb.1: # %foo
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; CHECK-NEXT: vadd.vi v8, v8, 1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB17_2: # %bar
; CHECK-NEXT: vadd.vi v8, v8, 2
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, iXLen 1, iXLen -1)
br i1 %cond, label %foo, label %bar
foo:
%b = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 1, iXLen 1)
ret <vscale x 4 x i32> %b
bar:
%c = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 2, iXLen 2)
ret <vscale x 4 x i32> %c
}
Loading
Loading