Skip to content

Commit

Permalink
Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" (
Browse files Browse the repository at this point in the history
  • Loading branch information
petar-avramovic authored Jan 24, 2024
1 parent 9dddb3d commit c46109d
Show file tree
Hide file tree
Showing 21 changed files with 259 additions and 824 deletions.
11 changes: 0 additions & 11 deletions llvm/include/llvm/CodeGen/MachineRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,17 +752,6 @@ class MachineRegisterInfo {
Register createVirtualRegister(const TargetRegisterClass *RegClass,
StringRef Name = "");

/// All avilable attributes a virtual register can have.
struct RegisterAttributes {
const RegClassOrRegBank *RCOrRB;
LLT Ty;
};

/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register attributes.
Register createVirtualRegister(RegisterAttributes RegAttr,
StringRef Name = "");

/// Create and return a new virtual register in the function with the same
/// attributes as the given register.
Register cloneVirtualRegister(Register VReg, StringRef Name = "");
Expand Down
19 changes: 0 additions & 19 deletions llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
MachineFunction &F, const MachineCycleInfo &cycleInfo,
const MachineDomTree &domTree, bool HasBranchDivergence);

/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
class MachineUniformityAnalysisPass : public MachineFunctionPass {
MachineUniformityInfo UI;

public:
static char ID;

MachineUniformityAnalysisPass();

MachineUniformityInfo &getUniformityInfo() { return UI; }
const MachineUniformityInfo &getUniformityInfo() const { return UI; }

bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
void print(raw_ostream &OS, const Module *M = nullptr) const override;

// TODO: verify analysis
};

} // namespace llvm

#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H
11 changes: 0 additions & 11 deletions llvm/lib/CodeGen/MachineRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}

/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register attributes.
Register MachineRegisterInfo::createVirtualRegister(RegisterAttributes RegAttr,
StringRef Name) {
Register Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = *RegAttr.RCOrRB;
setType(Reg, RegAttr.Ty);
noteNewVirtualRegister(Reg);
return Reg;
}

Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
StringRef Name) {
Register Reg = createIncompleteVirtualRegister(Name);
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(

namespace {

/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
class MachineUniformityAnalysisPass : public MachineFunctionPass {
MachineUniformityInfo UI;

public:
static char ID;

MachineUniformityAnalysisPass();

MachineUniformityInfo &getUniformityInfo() { return UI; }
const MachineUniformityInfo &getUniformityInfo() const { return UI; }

bool runOnMachineFunction(MachineFunction &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
void print(raw_ostream &OS, const Module *M = nullptr) const override;

// TODO: verify analysis
};

class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
public:
static char ID;
Expand Down
145 changes: 1 addition & 144 deletions llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,7 @@
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "SILowerI1Copies.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/InitializePasses.h"

#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"

Expand All @@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineUniformityAnalysisPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};

class DivergenceLoweringHelper : public PhiLoweringHelper {
public:
DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
MachinePostDominatorTree *PDT,
MachineUniformityInfo *MUI);

private:
MachineUniformityInfo *MUI = nullptr;
MachineIRBuilder B;
Register buildRegCopyToLaneMask(Register Reg);

public:
void markAsLaneMask(Register DstReg) const override;
void getCandidatesForLowering(
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
void collectIncomingValuesFromPhi(
const MachineInstr *MI,
SmallVectorImpl<Incoming> &Incomings) const override;
void replaceDstReg(Register NewReg, Register OldReg,
MachineBasicBlock *MBB) override;
void buildMergeLaneMasks(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg,
Register CurReg) override;
void constrainAsLaneMask(Incoming &In) override;
};

DivergenceLoweringHelper::DivergenceLoweringHelper(
MachineFunction *MF, MachineDominatorTree *DT,
MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
: PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}

// _(s1) -> SReg_32/64(s1)
void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
assert(MRI->getType(DstReg) == LLT::scalar(1));

if (MRI->getRegClassOrNull(DstReg)) {
if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
return;
llvm_unreachable("Failed to constrain register class");
}

MRI->setRegClass(DstReg, ST->getBoolRC());
}

void DivergenceLoweringHelper::getCandidatesForLowering(
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
LLT S1 = LLT::scalar(1);

// Add divergent i1 phis to the list
for (MachineBasicBlock &MBB : *MF) {
for (MachineInstr &MI : MBB.phis()) {
Register Dst = MI.getOperand(0).getReg();
if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
Vreg1Phis.push_back(&MI);
}
}
}

void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
Incomings.emplace_back(MI->getOperand(i).getReg(),
MI->getOperand(i + 1).getMBB(), Register());
}
}

void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
MachineBasicBlock *MBB) {
BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
.addReg(NewReg);
}

// Copy Reg to new lane mask register, insert a copy after instruction that
// defines Reg while skipping phis if needed.
Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
MachineInstr *Instr = MRI->getVRegDef(Reg);
MachineBasicBlock *MBB = Instr->getParent();
B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
B.buildCopy(LaneMask, Reg);
return LaneMask;
}

// bb.previous
// %PrevReg = ...
//
// bb.current
// %CurReg = ...
//
// %DstReg - not defined
//
// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
//
// bb.previous
// %PrevReg = ...
// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
//
// bb.current
// %CurReg = ...
// %CurRegCopy:sreg_32(s1) = COPY %CurReg
// ...
// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
//
// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
void DivergenceLoweringHelper::buildMergeLaneMasks(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg, Register CurReg) {
// DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
// TODO: check if inputs are constants or results of a compare.

Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);

B.setInsertPt(MBB, I);
B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
}

void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
"AMDGPU GlobalISel divergence lowering", false, false)

Expand All @@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {

bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
MachineFunction &MF) {
MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
MachineUniformityInfo &MUI =
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();

DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);

return Helper.lowerPhis();
return false;
}
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
const Register DefReg = I.getOperand(0).getReg();
const LLT DefTy = MRI->getType(DefReg);

if (DefTy == LLT::scalar(1)) {
if (!AllowRiskySelect) {
LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
Expand Down Expand Up @@ -3553,6 +3552,8 @@ bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
}

bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);

if (!I.isPreISelOpcode()) {
if (I.isCopy())
Expand Down Expand Up @@ -3695,8 +3696,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectWaveAddress(I);
case AMDGPU::G_STACKRESTORE:
return selectStackRestore(I);
case AMDGPU::G_PHI:
return selectPHI(I);
default:
return selectImpl(I, *CoverageInfo);
}
Expand Down
30 changes: 15 additions & 15 deletions llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@

using namespace llvm;

static Register
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
MachineRegisterInfo *MRI,
Register LaneMaskRegAttrs);

namespace {

Expand Down Expand Up @@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register DstReg, Register PrevReg,
Register CurReg) override;
void constrainAsLaneMask(Incoming &In) override;
void constrainIncomingRegisterTakenAsIs(Incoming &In) override;

bool lowerCopiesFromI1();
bool lowerCopiesToI1();
Expand Down Expand Up @@ -304,8 +304,7 @@ class LoopFinder {
/// blocks, so that the SSA updater doesn't have to search all the way to the
/// function entry.
void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
MachineRegisterInfo &MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs,
MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
ArrayRef<Incoming> Incomings = {}) {
assert(LoopLevel < CommonDominators.size());

Expand Down Expand Up @@ -412,15 +411,14 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
return new SILowerI1Copies();
}

Register llvm::createLaneMaskReg(
MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
return MRI->createVirtualRegister(LaneMaskRegAttrs);
Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
Register LaneMaskRegAttrs) {
return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
}

static Register
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
MachineRegisterInfo *MRI,
Register LaneMaskRegAttrs) {
MachineFunction &MF = *MBB->getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
Expand Down Expand Up @@ -621,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
for (auto &Incoming : Incomings) {
MachineBasicBlock &IMBB = *Incoming.Block;
if (PIA.isSource(IMBB)) {
constrainAsLaneMask(Incoming);
constrainIncomingRegisterTakenAsIs(Incoming);
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
} else {
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
Expand Down Expand Up @@ -913,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
}
}

void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}
void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
return;
}
11 changes: 4 additions & 7 deletions llvm/lib/Target/AMDGPU/SILowerI1Copies.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@ struct Incoming {
: Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
};

Register
createLaneMaskReg(MachineRegisterInfo *MRI,
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);

class PhiLoweringHelper {
public:
Expand All @@ -49,7 +47,7 @@ class PhiLoweringHelper {
MachineRegisterInfo *MRI = nullptr;
const GCNSubtarget *ST = nullptr;
const SIInstrInfo *TII = nullptr;
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs;
Register LaneMaskRegAttrs;

#ifndef NDEBUG
DenseSet<Register> PhiRegisters;
Expand All @@ -70,8 +68,7 @@ class PhiLoweringHelper {
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;

void initializeLaneMaskRegisterAttributes(Register LaneMask) {
LaneMaskRegAttrs.RCOrRB = &MRI->getRegClassOrRegBank(LaneMask);
LaneMaskRegAttrs.Ty = MRI->getType(LaneMask);
LaneMaskRegAttrs = LaneMask;
}

bool isLaneMaskReg(Register Reg) const {
Expand All @@ -94,7 +91,7 @@ class PhiLoweringHelper {
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DstReg,
Register PrevReg, Register CurReg) = 0;
virtual void constrainAsLaneMask(Incoming &In) = 0;
virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
};

} // end namespace llvm
Loading

0 comments on commit c46109d

Please sign in to comment.