@@ -105,6 +105,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
105
105
: RegisterRegAllocBase(N, D, C) {}
106
106
};
107
107
108
+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
109
+ public:
110
+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
111
+ : RegisterRegAllocBase(N, D, C) {}
112
+ };
113
+
108
114
static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
109
115
const MachineRegisterInfo &MRI,
110
116
const Register Reg) {
@@ -119,13 +125,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
119
125
return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
120
126
}
121
127
122
- // / -{sgpr|vgpr}-regalloc=... command line option.
128
+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
129
+ const MachineRegisterInfo &MRI,
130
+ const Register Reg) {
131
+ const SIMachineFunctionInfo *MFI =
132
+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
133
+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
134
+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
135
+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
136
+ }
137
+
138
+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
123
139
static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
124
140
125
141
// / A dummy default pass factory indicates whether the register allocator is
126
142
// / overridden on the command line.
127
143
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
128
144
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
145
+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
129
146
130
147
static SGPRRegisterRegAlloc
131
148
defaultSGPRRegAlloc (" default" ,
@@ -142,6 +159,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
142
159
VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
143
160
cl::desc (" Register allocator to use for VGPRs" ));
144
161
162
+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
163
+ RegisterPassParser<WWMRegisterRegAlloc>>
164
+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
165
+ cl::init (&useDefaultRegisterAllocator),
166
+ cl::desc(" Register allocator to use for WWM registers" ));
145
167
146
168
static void initializeDefaultSGPRRegisterAllocatorOnce () {
147
169
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -161,6 +183,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
161
183
}
162
184
}
163
185
186
+ static void initializeDefaultWWMRegisterAllocatorOnce () {
187
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
188
+
189
+ if (!Ctor) {
190
+ Ctor = WWMRegAlloc;
191
+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
192
+ }
193
+ }
194
+
164
195
static FunctionPass *createBasicSGPRRegisterAllocator () {
165
196
return createBasicRegisterAllocator (onlyAllocateSGPRs);
166
197
}
@@ -185,6 +216,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
185
216
return createFastRegisterAllocator (onlyAllocateVGPRs, true );
186
217
}
187
218
219
+ static FunctionPass *createBasicWWMRegisterAllocator () {
220
+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
221
+ }
222
+
223
+ static FunctionPass *createGreedyWWMRegisterAllocator () {
224
+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
225
+ }
226
+
227
+ static FunctionPass *createFastWWMRegisterAllocator () {
228
+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
229
+ }
230
+
188
231
static SGPRRegisterRegAlloc basicRegAllocSGPR (
189
232
" basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
190
233
static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -201,6 +244,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
201
244
202
245
static VGPRRegisterRegAlloc fastRegAllocVGPR (
203
246
" fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
247
+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
248
+ " basic register allocator" ,
249
+ createBasicWWMRegisterAllocator);
250
+ static WWMRegisterRegAlloc
251
+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
252
+ createGreedyWWMRegisterAllocator);
253
+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
254
+ createFastWWMRegisterAllocator);
204
255
} // anonymous namespace
205
256
206
257
static cl::opt<bool >
@@ -443,6 +494,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
443
494
initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
444
495
initializeAMDGPULowerModuleLDSLegacyPass (*PR);
445
496
initializeAMDGPULowerBufferFatPointersPass (*PR);
497
+ initializeAMDGPUReserveWWMRegsPass (*PR);
446
498
initializeAMDGPURewriteOutArgumentsPass (*PR);
447
499
initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
448
500
initializeAMDGPUUnifyMetadataPass (*PR);
@@ -994,6 +1046,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
994
1046
995
1047
FunctionPass *createSGPRAllocPass (bool Optimized);
996
1048
FunctionPass *createVGPRAllocPass (bool Optimized);
1049
+ FunctionPass *createWWMRegAllocPass (bool Optimized);
997
1050
FunctionPass *createRegAllocPass (bool Optimized) override ;
998
1051
999
1052
bool addRegAssignAndRewriteFast () override ;
@@ -1387,7 +1440,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
1387
1440
}
1388
1441
1389
1442
bool GCNPassConfig::addPreRewrite () {
1390
- addPass (&SILowerWWMCopiesID);
1391
1443
if (EnableRegReassign)
1392
1444
addPass (&GCNNSAReassignID);
1393
1445
return true ;
@@ -1423,12 +1475,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1423
1475
return createFastVGPRRegisterAllocator ();
1424
1476
}
1425
1477
1478
+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1479
+ // Initialize the global default.
1480
+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1481
+ initializeDefaultWWMRegisterAllocatorOnce);
1482
+
1483
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1484
+ if (Ctor != useDefaultRegisterAllocator)
1485
+ return Ctor ();
1486
+
1487
+ if (Optimized)
1488
+ return createGreedyWWMRegisterAllocator ();
1489
+
1490
+ return createFastWWMRegisterAllocator ();
1491
+ }
1492
+
1426
1493
FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
1427
1494
llvm_unreachable (" should not be used" );
1428
1495
}
1429
1496
1430
1497
static const char RegAllocOptNotSupportedMessage[] =
1431
- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1498
+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1499
+ " and -vgpr-regalloc" ;
1432
1500
1433
1501
bool GCNPassConfig::addRegAssignAndRewriteFast () {
1434
1502
if (!usingDefaultRegAlloc ())
@@ -1440,11 +1508,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
1440
1508
1441
1509
// Equivalent of PEI for SGPRs.
1442
1510
addPass (&SILowerSGPRSpillsID);
1511
+
1512
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1443
1513
addPass (&SIPreAllocateWWMRegsID);
1444
1514
1445
- addPass (createVGPRAllocPass (false ));
1515
+ // For allocating other wwm register operands.
1516
+ addPass (createWWMRegAllocPass (false ));
1446
1517
1447
1518
addPass (&SILowerWWMCopiesID);
1519
+ addPass (&AMDGPUReserveWWMRegsID);
1520
+
1521
+ // For allocating per-thread VGPRs.
1522
+ addPass (createVGPRAllocPass (false ));
1523
+
1448
1524
return true ;
1449
1525
}
1450
1526
@@ -1464,8 +1540,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1464
1540
1465
1541
// Equivalent of PEI for SGPRs.
1466
1542
addPass (&SILowerSGPRSpillsID);
1543
+
1544
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1467
1545
addPass (&SIPreAllocateWWMRegsID);
1468
1546
1547
+ // For allocating other whole wave mode registers.
1548
+ addPass (createWWMRegAllocPass (true ));
1549
+ addPass (&SILowerWWMCopiesID);
1550
+ addPass (createVirtRegRewriter (false ));
1551
+ addPass (&AMDGPUReserveWWMRegsID);
1552
+
1553
+ // For allocating per-thread VGPRs.
1469
1554
addPass (createVGPRAllocPass (true ));
1470
1555
1471
1556
addPreRewrite ();
0 commit comments