@@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
108
108
: RegisterRegAllocBase(N, D, C) {}
109
109
};
110
110
111
+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
112
+ public:
113
+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
114
+ : RegisterRegAllocBase(N, D, C) {}
115
+ };
116
+
111
117
static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
112
118
const MachineRegisterInfo &MRI,
113
119
const Register Reg) {
@@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
122
128
return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
123
129
}
124
130
125
- // / -{sgpr|vgpr}-regalloc=... command line option.
131
+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
132
+ const MachineRegisterInfo &MRI,
133
+ const Register Reg) {
134
+ const SIMachineFunctionInfo *MFI =
135
+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
136
+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
137
+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
138
+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
139
+ }
140
+
141
+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
126
142
static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
127
143
128
144
// / A dummy default pass factory indicates whether the register allocator is
129
145
// / overridden on the command line.
130
146
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
131
147
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
148
+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
132
149
133
150
static SGPRRegisterRegAlloc
134
151
defaultSGPRRegAlloc (" default" ,
@@ -145,6 +162,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
145
162
VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
146
163
cl::desc (" Register allocator to use for VGPRs" ));
147
164
165
+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
166
+ RegisterPassParser<WWMRegisterRegAlloc>>
167
+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
168
+ cl::init (&useDefaultRegisterAllocator),
169
+ cl::desc(" Register allocator to use for WWM registers" ));
148
170
149
171
static void initializeDefaultSGPRRegisterAllocatorOnce () {
150
172
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
164
186
}
165
187
}
166
188
189
+ static void initializeDefaultWWMRegisterAllocatorOnce () {
190
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
191
+
192
+ if (!Ctor) {
193
+ Ctor = WWMRegAlloc;
194
+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
195
+ }
196
+ }
197
+
167
198
static FunctionPass *createBasicSGPRRegisterAllocator () {
168
199
return createBasicRegisterAllocator (onlyAllocateSGPRs);
169
200
}
@@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
188
219
return createFastRegisterAllocator (onlyAllocateVGPRs, true );
189
220
}
190
221
222
+ static FunctionPass *createBasicWWMRegisterAllocator () {
223
+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
224
+ }
225
+
226
+ static FunctionPass *createGreedyWWMRegisterAllocator () {
227
+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
228
+ }
229
+
230
+ static FunctionPass *createFastWWMRegisterAllocator () {
231
+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
232
+ }
233
+
191
234
static SGPRRegisterRegAlloc basicRegAllocSGPR (
192
235
" basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
193
236
static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -204,6 +247,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
204
247
205
248
static VGPRRegisterRegAlloc fastRegAllocVGPR (
206
249
" fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
250
+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
251
+ " basic register allocator" ,
252
+ createBasicWWMRegisterAllocator);
253
+ static WWMRegisterRegAlloc
254
+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
255
+ createGreedyWWMRegisterAllocator);
256
+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
257
+ createFastWWMRegisterAllocator);
207
258
} // anonymous namespace
208
259
209
260
static cl::opt<bool >
@@ -440,6 +491,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
440
491
initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
441
492
initializeAMDGPULowerModuleLDSLegacyPass (*PR);
442
493
initializeAMDGPULowerBufferFatPointersPass (*PR);
494
+ initializeAMDGPUReserveWWMRegsPass (*PR);
443
495
initializeAMDGPURewriteOutArgumentsPass (*PR);
444
496
initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
445
497
initializeAMDGPUUnifyMetadataPass (*PR);
@@ -990,6 +1042,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
990
1042
991
1043
FunctionPass *createSGPRAllocPass (bool Optimized);
992
1044
FunctionPass *createVGPRAllocPass (bool Optimized);
1045
+ FunctionPass *createWWMRegAllocPass (bool Optimized);
993
1046
FunctionPass *createRegAllocPass (bool Optimized) override ;
994
1047
995
1048
bool addRegAssignAndRewriteFast () override ;
@@ -1383,7 +1436,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
1383
1436
}
1384
1437
1385
1438
bool GCNPassConfig::addPreRewrite () {
1386
- addPass (&SILowerWWMCopiesID);
1387
1439
if (EnableRegReassign)
1388
1440
addPass (&GCNNSAReassignID);
1389
1441
return true ;
@@ -1419,12 +1471,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1419
1471
return createFastVGPRRegisterAllocator ();
1420
1472
}
1421
1473
1474
+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1475
+ // Initialize the global default.
1476
+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1477
+ initializeDefaultWWMRegisterAllocatorOnce);
1478
+
1479
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1480
+ if (Ctor != useDefaultRegisterAllocator)
1481
+ return Ctor ();
1482
+
1483
+ if (Optimized)
1484
+ return createGreedyWWMRegisterAllocator ();
1485
+
1486
+ return createFastWWMRegisterAllocator ();
1487
+ }
1488
+
1422
1489
FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
1423
1490
llvm_unreachable (" should not be used" );
1424
1491
}
1425
1492
1426
1493
static const char RegAllocOptNotSupportedMessage[] =
1427
- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1494
+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1495
+ " and -vgpr-regalloc" ;
1428
1496
1429
1497
bool GCNPassConfig::addRegAssignAndRewriteFast () {
1430
1498
if (!usingDefaultRegAlloc ())
@@ -1436,11 +1504,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
1436
1504
1437
1505
// Equivalent of PEI for SGPRs.
1438
1506
addPass (&SILowerSGPRSpillsLegacyID);
1507
+
1508
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1439
1509
addPass (&SIPreAllocateWWMRegsID);
1440
1510
1441
- addPass (createVGPRAllocPass (false ));
1511
+ // For allocating other wwm register operands.
1512
+ addPass (createWWMRegAllocPass (false ));
1442
1513
1443
1514
addPass (&SILowerWWMCopiesID);
1515
+ addPass (&AMDGPUReserveWWMRegsID);
1516
+
1517
+ // For allocating per-thread VGPRs.
1518
+ addPass (createVGPRAllocPass (false ));
1519
+
1444
1520
return true ;
1445
1521
}
1446
1522
@@ -1460,8 +1536,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1460
1536
1461
1537
// Equivalent of PEI for SGPRs.
1462
1538
addPass (&SILowerSGPRSpillsLegacyID);
1539
+
1540
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1463
1541
addPass (&SIPreAllocateWWMRegsID);
1464
1542
1543
+ // For allocating other whole wave mode registers.
1544
+ addPass (createWWMRegAllocPass (true ));
1545
+ addPass (&SILowerWWMCopiesID);
1546
+ addPass (createVirtRegRewriter (false ));
1547
+ addPass (&AMDGPUReserveWWMRegsID);
1548
+
1549
+ // For allocating per-thread VGPRs.
1465
1550
addPass (createVGPRAllocPass (true ));
1466
1551
1467
1552
addPreRewrite ();
0 commit comments