@@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
108
108
: RegisterRegAllocBase(N, D, C) {}
109
109
};
110
110
111
+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
112
+ public:
113
+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
114
+ : RegisterRegAllocBase(N, D, C) {}
115
+ };
116
+
111
117
static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
112
118
const MachineRegisterInfo &MRI,
113
119
const Register Reg) {
@@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
122
128
return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
123
129
}
124
130
125
- // / -{sgpr|vgpr}-regalloc=... command line option.
131
+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
132
+ const MachineRegisterInfo &MRI,
133
+ const Register Reg) {
134
+ const SIMachineFunctionInfo *MFI =
135
+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
136
+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
137
+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
138
+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
139
+ }
140
+
141
+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
126
142
static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
127
143
128
144
// / A dummy default pass factory indicates whether the register allocator is
129
145
// / overridden on the command line.
130
146
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
131
147
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
148
+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
132
149
133
150
static SGPRRegisterRegAlloc
134
151
defaultSGPRRegAlloc (" default" ,
@@ -145,6 +162,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
145
162
VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
146
163
cl::desc (" Register allocator to use for VGPRs" ));
147
164
165
+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
166
+ RegisterPassParser<WWMRegisterRegAlloc>>
167
+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
168
+ cl::init (&useDefaultRegisterAllocator),
169
+ cl::desc(" Register allocator to use for WWM registers" ));
148
170
149
171
static void initializeDefaultSGPRRegisterAllocatorOnce () {
150
172
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
164
186
}
165
187
}
166
188
189
+ static void initializeDefaultWWMRegisterAllocatorOnce () {
190
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
191
+
192
+ if (!Ctor) {
193
+ Ctor = WWMRegAlloc;
194
+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
195
+ }
196
+ }
197
+
167
198
static FunctionPass *createBasicSGPRRegisterAllocator () {
168
199
return createBasicRegisterAllocator (onlyAllocateSGPRs);
169
200
}
@@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
188
219
return createFastRegisterAllocator (onlyAllocateVGPRs, true );
189
220
}
190
221
222
+ static FunctionPass *createBasicWWMRegisterAllocator () {
223
+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
224
+ }
225
+
226
+ static FunctionPass *createGreedyWWMRegisterAllocator () {
227
+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
228
+ }
229
+
230
+ static FunctionPass *createFastWWMRegisterAllocator () {
231
+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
232
+ }
233
+
191
234
static SGPRRegisterRegAlloc basicRegAllocSGPR (
192
235
" basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
193
236
static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -204,6 +247,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
204
247
205
248
static VGPRRegisterRegAlloc fastRegAllocVGPR (
206
249
" fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
250
+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
251
+ " basic register allocator" ,
252
+ createBasicWWMRegisterAllocator);
253
+ static WWMRegisterRegAlloc
254
+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
255
+ createGreedyWWMRegisterAllocator);
256
+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
257
+ createFastWWMRegisterAllocator);
207
258
} // anonymous namespace
208
259
209
260
static cl::opt<bool >
@@ -440,6 +491,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
440
491
initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
441
492
initializeAMDGPULowerModuleLDSLegacyPass (*PR);
442
493
initializeAMDGPULowerBufferFatPointersPass (*PR);
494
+ initializeAMDGPUReserveWWMRegsPass (*PR);
443
495
initializeAMDGPURewriteOutArgumentsPass (*PR);
444
496
initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
445
497
initializeAMDGPUUnifyMetadataPass (*PR);
@@ -989,6 +1041,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
989
1041
990
1042
FunctionPass *createSGPRAllocPass (bool Optimized);
991
1043
FunctionPass *createVGPRAllocPass (bool Optimized);
1044
+ FunctionPass *createWWMRegAllocPass (bool Optimized);
992
1045
FunctionPass *createRegAllocPass (bool Optimized) override ;
993
1046
994
1047
bool addRegAssignAndRewriteFast () override ;
@@ -1382,7 +1435,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
1382
1435
}
1383
1436
1384
1437
bool GCNPassConfig::addPreRewrite () {
1385
- addPass (&SILowerWWMCopiesID);
1386
1438
if (EnableRegReassign)
1387
1439
addPass (&GCNNSAReassignID);
1388
1440
return true ;
@@ -1418,12 +1470,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1418
1470
return createFastVGPRRegisterAllocator ();
1419
1471
}
1420
1472
1473
+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1474
+ // Initialize the global default.
1475
+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1476
+ initializeDefaultWWMRegisterAllocatorOnce);
1477
+
1478
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1479
+ if (Ctor != useDefaultRegisterAllocator)
1480
+ return Ctor ();
1481
+
1482
+ if (Optimized)
1483
+ return createGreedyWWMRegisterAllocator ();
1484
+
1485
+ return createFastWWMRegisterAllocator ();
1486
+ }
1487
+
1421
1488
FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
1422
1489
llvm_unreachable (" should not be used" );
1423
1490
}
1424
1491
1425
1492
static const char RegAllocOptNotSupportedMessage[] =
1426
- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1493
+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1494
+ " and -vgpr-regalloc" ;
1427
1495
1428
1496
bool GCNPassConfig::addRegAssignAndRewriteFast () {
1429
1497
if (!usingDefaultRegAlloc ())
@@ -1435,11 +1503,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
1435
1503
1436
1504
// Equivalent of PEI for SGPRs.
1437
1505
addPass (&SILowerSGPRSpillsLegacyID);
1506
+
1507
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1438
1508
addPass (&SIPreAllocateWWMRegsID);
1439
1509
1440
- addPass (createVGPRAllocPass (false ));
1510
+ // For allocating other wwm register operands.
1511
+ addPass (createWWMRegAllocPass (false ));
1441
1512
1442
1513
addPass (&SILowerWWMCopiesID);
1514
+ addPass (&AMDGPUReserveWWMRegsID);
1515
+
1516
+ // For allocating per-thread VGPRs.
1517
+ addPass (createVGPRAllocPass (false ));
1518
+
1443
1519
return true ;
1444
1520
}
1445
1521
@@ -1459,8 +1535,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1459
1535
1460
1536
// Equivalent of PEI for SGPRs.
1461
1537
addPass (&SILowerSGPRSpillsLegacyID);
1538
+
1539
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1462
1540
addPass (&SIPreAllocateWWMRegsID);
1463
1541
1542
+ // For allocating other whole wave mode registers.
1543
+ addPass (createWWMRegAllocPass (true ));
1544
+ addPass (&SILowerWWMCopiesID);
1545
+ addPass (createVirtRegRewriter (false ));
1546
+ addPass (&AMDGPUReserveWWMRegsID);
1547
+
1548
+ // For allocating per-thread VGPRs.
1464
1549
addPass (createVGPRAllocPass (true ));
1465
1550
1466
1551
addPreRewrite ();
0 commit comments