Skip to content

Commit 5e8c62b

Browse files
committed
[AMDGPU] Split VGPR regalloc pipeline
Allocating wwm-registers and regular VGPR operands together imposes many challenges in the way the registers are reused during allocation. There are times when regalloc reuses the registers of regular VGPRs operations for wwm-operations in a small range leading to unwantedly clobbering their inactive lanes causing correctness issues which are hard to trace. This patch splits the VGPR allocation pipeline further to allocate wwm-registers first and the regular VGPR operands in a separate pipeline. The splitting would ensure that the physical registers used for wwm allocations won't taken part in the next allocation pipeline to avoid any such clobbering.
1 parent 0c1500e commit 5e8c62b

File tree

86 files changed

+10411
-10907
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+10411
-10907
lines changed

llvm/include/llvm/CodeGen/MachineRegisterInfo.h

+2
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ class MachineRegisterInfo {
184184
TheDelegate->MRI_NoteCloneVirtualRegister(NewReg, SrcReg);
185185
}
186186

187+
const MachineFunction &getMF() const { return *MF; }
188+
187189
//===--------------------------------------------------------------------===//
188190
// Function State
189191
//===--------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPU.h

+4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
5757
ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
5858
FunctionPass *createAMDGPUCodeGenPreparePass();
5959
FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
60+
FunctionPass *createAMDGPUReserveWWMRegsPass();
6061
FunctionPass *createAMDGPURewriteOutArgumentsPass();
6162
ModulePass *
6263
createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
@@ -154,6 +155,9 @@ struct AMDGPULowerBufferFatPointersPass
154155
const TargetMachine &TM;
155156
};
156157

158+
void initializeAMDGPUReserveWWMRegsPass(PassRegistry &);
159+
extern char &AMDGPUReserveWWMRegsID;
160+
157161
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
158162
extern char &AMDGPURewriteOutArgumentsID;
159163

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
//===-- AMDGPUReserveWWMRegs.cpp - Add WWM Regs to reserved regs list -----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This pass should be invoked at the end of wwm-regalloc pipeline.
11+
/// It identifies the WWM regs allocated during this pipeline and add
12+
/// them to the list of reserved registers so that they won't be available for
13+
/// per-thread VGPR allocation in the subsequent regalloc pipeline.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "AMDGPU.h"
18+
#include "GCNSubtarget.h"
19+
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
20+
#include "SIMachineFunctionInfo.h"
21+
#include "llvm/CodeGen/MachineFunctionPass.h"
22+
#include "llvm/CodeGen/VirtRegMap.h"
23+
#include "llvm/InitializePasses.h"
24+
25+
using namespace llvm;
26+
27+
#define DEBUG_TYPE "amdgpu-reserve-wwm-regs"
28+
29+
namespace {
30+
31+
class AMDGPUReserveWWMRegs : public MachineFunctionPass {
32+
public:
33+
static char ID;
34+
35+
AMDGPUReserveWWMRegs() : MachineFunctionPass(ID) {
36+
initializeAMDGPUReserveWWMRegsPass(*PassRegistry::getPassRegistry());
37+
}
38+
39+
bool runOnMachineFunction(MachineFunction &MF) override;
40+
41+
StringRef getPassName() const override {
42+
return "AMDGPU Reserve WWM Registers";
43+
}
44+
45+
void getAnalysisUsage(AnalysisUsage &AU) const override {
46+
AU.setPreservesAll();
47+
MachineFunctionPass::getAnalysisUsage(AU);
48+
}
49+
};
50+
51+
} // End anonymous namespace.
52+
53+
INITIALIZE_PASS(AMDGPUReserveWWMRegs, DEBUG_TYPE,
54+
"AMDGPU Reserve WWM Registers", false, false)
55+
56+
char AMDGPUReserveWWMRegs::ID = 0;
57+
58+
char &llvm::AMDGPUReserveWWMRegsID = AMDGPUReserveWWMRegs::ID;
59+
60+
bool AMDGPUReserveWWMRegs::runOnMachineFunction(MachineFunction &MF) {
61+
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
62+
63+
bool Changed = false;
64+
for (MachineBasicBlock &MBB : MF) {
65+
for (MachineInstr &MI : MBB) {
66+
unsigned Opc = MI.getOpcode();
67+
if (Opc != AMDGPU::SI_SPILL_S32_TO_VGPR &&
68+
Opc != AMDGPU::SI_RESTORE_S32_FROM_VGPR)
69+
continue;
70+
71+
Register Reg = Opc == AMDGPU::SI_SPILL_S32_TO_VGPR
72+
? MI.getOperand(0).getReg()
73+
: MI.getOperand(1).getReg();
74+
75+
assert(Reg.isPhysical() &&
76+
"All WWM registers should have been allocated by now.");
77+
78+
MFI->reserveWWMRegister(Reg);
79+
Changed |= true;
80+
}
81+
}
82+
83+
// The renamable flag can't be set for reserved registers. Reset the flag for
84+
// MOs involving wwm-regs as they will be reserved during vgpr-regalloc
85+
// pipeline.
86+
const MachineRegisterInfo &MRI = MF.getRegInfo();
87+
for (Register Reg : MFI->getWWMReservedRegs()) {
88+
for (MachineOperand &MO : MRI.reg_operands(Reg))
89+
MO.setIsRenamable(false);
90+
}
91+
92+
// Now clear the NonWWMRegMask earlier set during wwm-regalloc.
93+
MFI->clearNonWWMRegAllocMask();
94+
95+
return Changed;
96+
}

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+89-4
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
105105
: RegisterRegAllocBase(N, D, C) {}
106106
};
107107

108+
class WWMRegisterRegAlloc : public RegisterRegAllocBase<WWMRegisterRegAlloc> {
109+
public:
110+
WWMRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
111+
: RegisterRegAllocBase(N, D, C) {}
112+
};
113+
108114
static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
109115
const MachineRegisterInfo &MRI,
110116
const Register Reg) {
@@ -119,13 +125,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
119125
return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC);
120126
}
121127

122-
/// -{sgpr|vgpr}-regalloc=... command line option.
128+
static bool onlyAllocateWWMRegs(const TargetRegisterInfo &TRI,
129+
const MachineRegisterInfo &MRI,
130+
const Register Reg) {
131+
const SIMachineFunctionInfo *MFI =
132+
MRI.getMF().getInfo<SIMachineFunctionInfo>();
133+
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
134+
return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(RC) &&
135+
MFI->checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
136+
}
137+
138+
/// -{sgpr|wwm|vgpr}-regalloc=... command line option.
123139
static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
124140

125141
/// A dummy default pass factory indicates whether the register allocator is
126142
/// overridden on the command line.
127143
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
128144
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
145+
static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
129146

130147
static SGPRRegisterRegAlloc
131148
defaultSGPRRegAlloc("default",
@@ -142,6 +159,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
142159
VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
143160
cl::desc("Register allocator to use for VGPRs"));
144161

162+
static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
163+
RegisterPassParser<WWMRegisterRegAlloc>>
164+
WWMRegAlloc("wwm-regalloc", cl::Hidden,
165+
cl::init(&useDefaultRegisterAllocator),
166+
cl::desc("Register allocator to use for WWM registers"));
145167

146168
static void initializeDefaultSGPRRegisterAllocatorOnce() {
147169
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
@@ -161,6 +183,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
161183
}
162184
}
163185

186+
static void initializeDefaultWWMRegisterAllocatorOnce() {
187+
RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
188+
189+
if (!Ctor) {
190+
Ctor = WWMRegAlloc;
191+
WWMRegisterRegAlloc::setDefault(WWMRegAlloc);
192+
}
193+
}
194+
164195
static FunctionPass *createBasicSGPRRegisterAllocator() {
165196
return createBasicRegisterAllocator(onlyAllocateSGPRs);
166197
}
@@ -185,6 +216,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
185216
return createFastRegisterAllocator(onlyAllocateVGPRs, true);
186217
}
187218

219+
static FunctionPass *createBasicWWMRegisterAllocator() {
220+
return createBasicRegisterAllocator(onlyAllocateWWMRegs);
221+
}
222+
223+
static FunctionPass *createGreedyWWMRegisterAllocator() {
224+
return createGreedyRegisterAllocator(onlyAllocateWWMRegs);
225+
}
226+
227+
static FunctionPass *createFastWWMRegisterAllocator() {
228+
return createFastRegisterAllocator(onlyAllocateWWMRegs, false);
229+
}
230+
188231
static SGPRRegisterRegAlloc basicRegAllocSGPR(
189232
"basic", "basic register allocator", createBasicSGPRRegisterAllocator);
190233
static SGPRRegisterRegAlloc greedyRegAllocSGPR(
@@ -201,6 +244,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
201244

202245
static VGPRRegisterRegAlloc fastRegAllocVGPR(
203246
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
247+
static WWMRegisterRegAlloc basicRegAllocWWMReg("basic",
248+
"basic register allocator",
249+
createBasicWWMRegisterAllocator);
250+
static WWMRegisterRegAlloc
251+
greedyRegAllocWWMReg("greedy", "greedy register allocator",
252+
createGreedyWWMRegisterAllocator);
253+
static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator",
254+
createFastWWMRegisterAllocator);
204255
} // anonymous namespace
205256

206257
static cl::opt<bool>
@@ -443,6 +494,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
443494
initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
444495
initializeAMDGPULowerModuleLDSLegacyPass(*PR);
445496
initializeAMDGPULowerBufferFatPointersPass(*PR);
497+
initializeAMDGPUReserveWWMRegsPass(*PR);
446498
initializeAMDGPURewriteOutArgumentsPass(*PR);
447499
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
448500
initializeAMDGPUUnifyMetadataPass(*PR);
@@ -994,6 +1046,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
9941046

9951047
FunctionPass *createSGPRAllocPass(bool Optimized);
9961048
FunctionPass *createVGPRAllocPass(bool Optimized);
1049+
FunctionPass *createWWMRegAllocPass(bool Optimized);
9971050
FunctionPass *createRegAllocPass(bool Optimized) override;
9981051

9991052
bool addRegAssignAndRewriteFast() override;
@@ -1387,7 +1440,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
13871440
}
13881441

13891442
bool GCNPassConfig::addPreRewrite() {
1390-
addPass(&SILowerWWMCopiesID);
13911443
if (EnableRegReassign)
13921444
addPass(&GCNNSAReassignID);
13931445
return true;
@@ -1423,12 +1475,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14231475
return createFastVGPRRegisterAllocator();
14241476
}
14251477

1478+
FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) {
1479+
// Initialize the global default.
1480+
llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag,
1481+
initializeDefaultWWMRegisterAllocatorOnce);
1482+
1483+
RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault();
1484+
if (Ctor != useDefaultRegisterAllocator)
1485+
return Ctor();
1486+
1487+
if (Optimized)
1488+
return createGreedyWWMRegisterAllocator();
1489+
1490+
return createFastWWMRegisterAllocator();
1491+
}
1492+
14261493
FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
14271494
llvm_unreachable("should not be used");
14281495
}
14291496

14301497
static const char RegAllocOptNotSupportedMessage[] =
1431-
"-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1498+
"-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1499+
"and -vgpr-regalloc";
14321500

14331501
bool GCNPassConfig::addRegAssignAndRewriteFast() {
14341502
if (!usingDefaultRegAlloc())
@@ -1440,11 +1508,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14401508

14411509
// Equivalent of PEI for SGPRs.
14421510
addPass(&SILowerSGPRSpillsID);
1511+
1512+
// To Allocate wwm registers used in whole quad mode operations (for shaders).
14431513
addPass(&SIPreAllocateWWMRegsID);
14441514

1445-
addPass(createVGPRAllocPass(false));
1515+
// For allocating other wwm register operands.
1516+
addPass(createWWMRegAllocPass(false));
14461517

14471518
addPass(&SILowerWWMCopiesID);
1519+
addPass(&AMDGPUReserveWWMRegsID);
1520+
1521+
// For allocating per-thread VGPRs.
1522+
addPass(createVGPRAllocPass(false));
1523+
14481524
return true;
14491525
}
14501526

@@ -1464,8 +1540,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14641540

14651541
// Equivalent of PEI for SGPRs.
14661542
addPass(&SILowerSGPRSpillsID);
1543+
1544+
// To Allocate wwm registers used in whole quad mode operations (for shaders).
14671545
addPass(&SIPreAllocateWWMRegsID);
14681546

1547+
// For allocating other whole wave mode registers.
1548+
addPass(createWWMRegAllocPass(true));
1549+
addPass(&SILowerWWMCopiesID);
1550+
addPass(createVirtRegRewriter(false));
1551+
addPass(&AMDGPUReserveWWMRegsID);
1552+
1553+
// For allocating per-thread VGPRs.
14691554
addPass(createVGPRAllocPass(true));
14701555

14711556
addPreRewrite();

llvm/lib/Target/AMDGPU/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ add_llvm_target(AMDGPUCodeGen
9595
AMDGPURegBankSelect.cpp
9696
AMDGPURegisterBankInfo.cpp
9797
AMDGPURemoveIncompatibleFunctions.cpp
98+
AMDGPUReserveWWMRegs.cpp
9899
AMDGPUResourceUsageAnalysis.cpp
99100
AMDGPURewriteOutArguments.cpp
100101
AMDGPURewriteUndefForPHI.cpp

0 commit comments

Comments
 (0)