Skip to content

Commit bb64a63

Browse files
cdevadasronlieb
authored andcommitted
[AMDGPU] Split vgpr regalloc pipeline (llvm#93526)
Allocating wwm-registers and regular VGPR operands together imposes many challenges in the way the registers are reused during allocation. There are times when regalloc reuses the registers of regular VGPRs operations for wwm-operations in a small range leading to unwantedly clobbering their inactive lanes causing correctness issues which are hard to trace. This patch splits the VGPR allocation pipeline further to allocate wwm-registers first and the regular VGPR operands in a separate pipeline. The splitting would ensure that the physical registers used for wwm allocations won't taken part in the next allocation pipeline to avoid any such clobbering. Change-Id: Ib2c5b9b53944bf78709465a9d1786d129434ce40
1 parent 68919c9 commit bb64a63

File tree

88 files changed

+10423
-10878
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+10423
-10878
lines changed

llvm/include/llvm/CodeGen/MachineRegisterInfo.h

+2
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ class MachineRegisterInfo {
184184
TheDelegate->MRI_NoteCloneVirtualRegister(NewReg, SrcReg);
185185
}
186186

187+
const MachineFunction &getMF() const { return *MF; }
188+
187189
//===--------------------------------------------------------------------===//
188190
// Function State
189191
//===--------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPU.h

+4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
5757
ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
5858
FunctionPass *createAMDGPUCodeGenPreparePass();
5959
FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
60+
FunctionPass *createAMDGPUReserveWWMRegsPass();
6061
FunctionPass *createAMDGPURewriteOutArgumentsPass();
6162
ModulePass *
6263
createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
@@ -154,6 +155,9 @@ struct AMDGPULowerBufferFatPointersPass
154155
const TargetMachine &TM;
155156
};
156157

158+
void initializeAMDGPUReserveWWMRegsPass(PassRegistry &);
159+
extern char &AMDGPUReserveWWMRegsID;
160+
157161
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
158162
extern char &AMDGPURewriteOutArgumentsID;
159163

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
//===-- AMDGPUReserveWWMRegs.cpp - Add WWM Regs to reserved regs list -----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This pass should be invoked at the end of wwm-regalloc pipeline.
11+
/// It identifies the WWM regs allocated during this pipeline and add
12+
/// them to the list of reserved registers so that they won't be available for
13+
/// regular VGPR allocation in the subsequent regalloc pipeline.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "AMDGPU.h"
18+
#include "GCNSubtarget.h"
19+
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20+
#include "SIMachineFunctionInfo.h"
21+
#include "llvm/CodeGen/LiveIntervals.h"
22+
#include "llvm/CodeGen/MachineFunctionPass.h"
23+
#include "llvm/CodeGen/VirtRegMap.h"
24+
#include "llvm/InitializePasses.h"
25+
26+
using namespace llvm;
27+
28+
#define DEBUG_TYPE "amdgpu-reserve-wwm-regs"
29+
30+
namespace {
31+
32+
class AMDGPUReserveWWMRegs : public MachineFunctionPass {
33+
public:
34+
static char ID;
35+
36+
AMDGPUReserveWWMRegs() : MachineFunctionPass(ID) {
37+
initializeAMDGPUReserveWWMRegsPass(*PassRegistry::getPassRegistry());
38+
}
39+
40+
bool runOnMachineFunction(MachineFunction &MF) override;
41+
42+
StringRef getPassName() const override {
43+
return "AMDGPU Reserve WWM Registers";
44+
}
45+
46+
void getAnalysisUsage(AnalysisUsage &AU) const override {
47+
AU.setPreservesAll();
48+
MachineFunctionPass::getAnalysisUsage(AU);
49+
}
50+
};
51+
52+
} // End anonymous namespace.
53+
54+
INITIALIZE_PASS(AMDGPUReserveWWMRegs, DEBUG_TYPE,
55+
"AMDGPU Reserve WWM Registers", false, false)
56+
57+
char AMDGPUReserveWWMRegs::ID = 0;
58+
59+
char &llvm::AMDGPUReserveWWMRegsID = AMDGPUReserveWWMRegs::ID;
60+
61+
bool AMDGPUReserveWWMRegs::runOnMachineFunction(MachineFunction &MF) {
62+
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
63+
64+
bool Changed = false;
65+
for (MachineBasicBlock &MBB : MF) {
66+
for (MachineInstr &MI : MBB) {
67+
unsigned Opc = MI.getOpcode();
68+
if (Opc != AMDGPU::SI_SPILL_S32_TO_VGPR &&
69+
Opc != AMDGPU::SI_RESTORE_S32_FROM_VGPR)
70+
continue;
71+
72+
Register Reg = Opc == AMDGPU::SI_SPILL_S32_TO_VGPR
73+
? MI.getOperand(0).getReg()
74+
: MI.getOperand(1).getReg();
75+
76+
assert(Reg.isPhysical() &&
77+
"All WWM registers should have been allocated by now.");
78+
79+
MFI->reserveWWMRegister(Reg);
80+
Changed |= true;
81+
}
82+
}
83+
84+
// The renamable flag can't be set for reserved registers. Reset the flag for
85+
// MOs involving wwm-regs as they will be reserved during vgpr-regalloc
86+
// pipeline.
87+
const MachineRegisterInfo &MRI = MF.getRegInfo();
88+
for (Register Reg : MFI->getWWMReservedRegs()) {
89+
for (MachineOperand &MO : MRI.reg_operands(Reg))
90+
MO.setIsRenamable(false);
91+
}
92+
93+
// Now clear the NonWWMRegMask earlier set during wwm-regalloc.
94+
MFI->clearNonWWMRegAllocMask();
95+
96+
return Changed;
97+
}

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+90-4
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
108108
: RegisterRegAllocBase(N, D, C) {}
109109
};
110110

111+
class WWMRegisterRegAlloc : public RegisterRegAllocBase<WWMRegisterRegAlloc> {
112+
public:
113+
WWMRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
114+
: RegisterRegAllocBase(N, D, C) {}
115+
};
116+
111117
static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
112118
const MachineRegisterInfo &MRI,
113119
const Register Reg) {
@@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
122128
return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
123129
}
124130

125-
/// -{sgpr|vgpr}-regalloc=... command line option.
131+
static bool onlyAllocateWWMRegs(const TargetRegisterInfo &TRI,
132+
const MachineRegisterInfo &MRI,
133+
const Register Reg) {
134+
const SIMachineFunctionInfo *MFI =
135+
MRI.getMF().getInfo<SIMachineFunctionInfo>();
136+
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
137+
return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC) &&
138+
MFI->checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
139+
}
140+
141+
/// -{sgpr|wwm|vgpr}-regalloc=... command line option.
126142
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
127143

128144
/// A dummy default pass factory indicates whether the register allocator is
129145
/// overridden on the command line.
130146
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
131147
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
148+
static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
132149

133150
static SGPRRegisterRegAlloc
134151
defaultSGPRRegAlloc("default",
@@ -145,6 +162,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
145162
VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
146163
cl::desc("Register allocator to use for VGPRs"));
147164

165+
static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
166+
RegisterPassParser<WWMRegisterRegAlloc>>
167+
WWMRegAlloc("wwm-regalloc", cl::Hidden,
168+
cl::init(&useDefaultRegisterAllocator),
169+
cl::desc("Register allocator to use for WWM registers"));
148170

149171
static void initializeDefaultSGPRRegisterAllocatorOnce() {
150172
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
@@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
164186
}
165187
}
166188

189+
static void initializeDefaultWWMRegisterAllocatorOnce() {
190+
RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
191+
192+
if (!Ctor) {
193+
Ctor = WWMRegAlloc;
194+
WWMRegisterRegAlloc::setDefault(WWMRegAlloc);
195+
}
196+
}
197+
167198
static FunctionPass *createBasicSGPRRegisterAllocator() {
168199
return createBasicRegisterAllocator(onlyAllocateSGPRs);
169200
}
@@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
188219
return createFastRegisterAllocator(onlyAllocateVGPRs, true);
189220
}
190221

222+
static FunctionPass *createBasicWWMRegisterAllocator() {
223+
return createBasicRegisterAllocator(onlyAllocateWWMRegs);
224+
}
225+
226+
static FunctionPass *createGreedyWWMRegisterAllocator() {
227+
return createGreedyRegisterAllocator(onlyAllocateWWMRegs);
228+
}
229+
230+
static FunctionPass *createFastWWMRegisterAllocator() {
231+
return createFastRegisterAllocator(onlyAllocateWWMRegs, false);
232+
}
233+
191234
static SGPRRegisterRegAlloc basicRegAllocSGPR(
192235
"basic", "basic register allocator", createBasicSGPRRegisterAllocator);
193236
static SGPRRegisterRegAlloc greedyRegAllocSGPR(
@@ -204,6 +247,15 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
204247

205248
static VGPRRegisterRegAlloc fastRegAllocVGPR(
206249
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
250+
251+
static WWMRegisterRegAlloc basicRegAllocWWMReg("basic",
252+
"basic register allocator",
253+
createBasicWWMRegisterAllocator);
254+
static WWMRegisterRegAlloc
255+
greedyRegAllocWWMReg("greedy", "greedy register allocator",
256+
createGreedyWWMRegisterAllocator);
257+
static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator",
258+
createFastWWMRegisterAllocator);
207259
} // anonymous namespace
208260

209261
static cl::opt<bool>
@@ -440,6 +492,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
440492
initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
441493
initializeAMDGPULowerModuleLDSLegacyPass(*PR);
442494
initializeAMDGPULowerBufferFatPointersPass(*PR);
495+
initializeAMDGPUReserveWWMRegsPass(*PR);
443496
initializeAMDGPURewriteOutArgumentsPass(*PR);
444497
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
445498
initializeAMDGPUUnifyMetadataPass(*PR);
@@ -1021,6 +1074,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
10211074

10221075
FunctionPass *createSGPRAllocPass(bool Optimized);
10231076
FunctionPass *createVGPRAllocPass(bool Optimized);
1077+
FunctionPass *createWWMRegAllocPass(bool Optimized);
10241078
FunctionPass *createRegAllocPass(bool Optimized) override;
10251079

10261080
bool addRegAssignAndRewriteFast() override;
@@ -1417,7 +1471,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
14171471
}
14181472

14191473
bool GCNPassConfig::addPreRewrite() {
1420-
addPass(&SILowerWWMCopiesID);
14211474
if (EnableRegReassign)
14221475
addPass(&GCNNSAReassignID);
14231476
return true;
@@ -1453,12 +1506,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14531506
return createFastVGPRRegisterAllocator();
14541507
}
14551508

1509+
FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) {
1510+
// Initialize the global default.
1511+
llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag,
1512+
initializeDefaultWWMRegisterAllocatorOnce);
1513+
1514+
RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
1515+
if (Ctor != useDefaultRegisterAllocator)
1516+
return Ctor();
1517+
1518+
if (Optimized)
1519+
return createGreedyWWMRegisterAllocator();
1520+
1521+
return createFastWWMRegisterAllocator();
1522+
}
1523+
14561524
FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
14571525
llvm_unreachable("should not be used");
14581526
}
14591527

14601528
static const char RegAllocOptNotSupportedMessage[] =
1461-
"-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1529+
"-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1530+
"and -vgpr-regalloc";
14621531

14631532
bool GCNPassConfig::addRegAssignAndRewriteFast() {
14641533
if (!usingDefaultRegAlloc())
@@ -1470,11 +1539,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14701539

14711540
// Equivalent of PEI for SGPRs.
14721541
addPass(&SILowerSGPRSpillsLegacyID);
1542+
1543+
// To Allocate wwm registers used in whole quad mode operations (for shaders).
14731544
addPass(&SIPreAllocateWWMRegsID);
14741545

1475-
addPass(createVGPRAllocPass(false));
1546+
// For allocating other wwm register operands.
1547+
addPass(createWWMRegAllocPass(false));
14761548

14771549
addPass(&SILowerWWMCopiesID);
1550+
addPass(&AMDGPUReserveWWMRegsID);
1551+
1552+
// For allocating regular VGPRs.
1553+
addPass(createVGPRAllocPass(false));
1554+
14781555
return true;
14791556
}
14801557

@@ -1494,8 +1571,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14941571

14951572
// Equivalent of PEI for SGPRs.
14961573
addPass(&SILowerSGPRSpillsLegacyID);
1574+
1575+
// To Allocate wwm registers used in whole quad mode operations (for shaders).
14971576
addPass(&SIPreAllocateWWMRegsID);
14981577

1578+
// For allocating other whole wave mode registers.
1579+
addPass(createWWMRegAllocPass(true));
1580+
addPass(&SILowerWWMCopiesID);
1581+
addPass(createVirtRegRewriter(false));
1582+
addPass(&AMDGPUReserveWWMRegsID);
1583+
1584+
// For allocating regular VGPRs.
14991585
addPass(createVGPRAllocPass(true));
15001586

15011587
addPreRewrite();

llvm/lib/Target/AMDGPU/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ add_llvm_target(AMDGPUCodeGen
9595
AMDGPURegBankSelect.cpp
9696
AMDGPURegisterBankInfo.cpp
9797
AMDGPURemoveIncompatibleFunctions.cpp
98+
AMDGPUReserveWWMRegs.cpp
9899
AMDGPUResourceUsageAnalysis.cpp
99100
AMDGPURewriteOutArguments.cpp
100101
AMDGPURewriteUndefForPHI.cpp

0 commit comments

Comments
 (0)