@@ -84,6 +84,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
84
84
: RegisterRegAllocBase(N, D, C) {}
85
85
};
86
86
87
+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
88
+ public:
89
+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
90
+ : RegisterRegAllocBase(N, D, C) {}
91
+ };
92
+
87
93
static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
88
94
const MachineRegisterInfo &MRI,
89
95
const Register Reg) {
@@ -98,13 +104,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
98
104
return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
99
105
}
100
106
101
- // / -{sgpr|vgpr}-regalloc=... command line option.
107
+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
108
+ const MachineRegisterInfo &MRI,
109
+ const Register Reg) {
110
+ const SIMachineFunctionInfo *MFI =
111
+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
112
+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
113
+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
114
+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
115
+ }
116
+
117
+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
102
118
static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
103
119
104
120
// / A dummy default pass factory indicates whether the register allocator is
105
121
// / overridden on the command line.
106
122
static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
107
123
static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
124
+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
108
125
109
126
static SGPRRegisterRegAlloc
110
127
defaultSGPRRegAlloc (" default" ,
@@ -121,6 +138,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
121
138
VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
122
139
cl::desc (" Register allocator to use for VGPRs" ));
123
140
141
+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
142
+ RegisterPassParser<WWMRegisterRegAlloc>>
143
+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
144
+ cl::init (&useDefaultRegisterAllocator),
145
+ cl::desc(" Register allocator to use for WWM registers" ));
124
146
125
147
static void initializeDefaultSGPRRegisterAllocatorOnce () {
126
148
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -140,6 +162,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
140
162
}
141
163
}
142
164
165
+ static void initializeDefaultWWMRegisterAllocatorOnce () {
166
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
167
+
168
+ if (!Ctor) {
169
+ Ctor = WWMRegAlloc;
170
+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
171
+ }
172
+ }
173
+
143
174
static FunctionPass *createBasicSGPRRegisterAllocator () {
144
175
return createBasicRegisterAllocator (onlyAllocateSGPRs);
145
176
}
@@ -164,6 +195,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
164
195
return createFastRegisterAllocator (onlyAllocateVGPRs, true );
165
196
}
166
197
198
+ static FunctionPass *createBasicWWMRegisterAllocator () {
199
+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
200
+ }
201
+
202
+ static FunctionPass *createGreedyWWMRegisterAllocator () {
203
+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
204
+ }
205
+
206
+ static FunctionPass *createFastWWMRegisterAllocator () {
207
+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
208
+ }
209
+
167
210
static SGPRRegisterRegAlloc basicRegAllocSGPR (
168
211
" basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
169
212
static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -180,6 +223,15 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
180
223
181
224
static VGPRRegisterRegAlloc fastRegAllocVGPR (
182
225
" fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
226
+
227
+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
228
+ " basic register allocator" ,
229
+ createBasicWWMRegisterAllocator);
230
+ static WWMRegisterRegAlloc
231
+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
232
+ createGreedyWWMRegisterAllocator);
233
+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
234
+ createFastWWMRegisterAllocator);
183
235
} // anonymous namespace
184
236
185
237
static cl::opt<bool >
@@ -437,6 +489,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
437
489
initializeAMDGPUSwLowerLDSLegacyPass (*PR);
438
490
initializeAMDGPULowerModuleLDSLegacyPass (*PR);
439
491
initializeAMDGPULowerBufferFatPointersPass (*PR);
492
+ initializeAMDGPUReserveWWMRegsPass (*PR);
440
493
initializeAMDGPURewriteOutArgumentsPass (*PR);
441
494
initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
442
495
initializeAMDGPUUnifyMetadataPass (*PR);
@@ -995,6 +1048,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
995
1048
996
1049
FunctionPass *createSGPRAllocPass (bool Optimized);
997
1050
FunctionPass *createVGPRAllocPass (bool Optimized);
1051
+ FunctionPass *createWWMRegAllocPass (bool Optimized);
998
1052
FunctionPass *createRegAllocPass (bool Optimized) override ;
999
1053
1000
1054
bool addRegAssignAndRewriteFast () override ;
@@ -1409,7 +1463,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
1409
1463
}
1410
1464
1411
1465
bool GCNPassConfig::addPreRewrite () {
1412
- addPass (&SILowerWWMCopiesID);
1413
1466
if (EnableRegReassign)
1414
1467
addPass (&GCNNSAReassignID);
1415
1468
return true ;
@@ -1445,12 +1498,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1445
1498
return createFastVGPRRegisterAllocator ();
1446
1499
}
1447
1500
1501
+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1502
+ // Initialize the global default.
1503
+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1504
+ initializeDefaultWWMRegisterAllocatorOnce);
1505
+
1506
+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1507
+ if (Ctor != useDefaultRegisterAllocator)
1508
+ return Ctor ();
1509
+
1510
+ if (Optimized)
1511
+ return createGreedyWWMRegisterAllocator ();
1512
+
1513
+ return createFastWWMRegisterAllocator ();
1514
+ }
1515
+
1448
1516
FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
1449
1517
llvm_unreachable (" should not be used" );
1450
1518
}
1451
1519
1452
1520
static const char RegAllocOptNotSupportedMessage[] =
1453
- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1521
+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1522
+ " and -vgpr-regalloc" ;
1454
1523
1455
1524
bool GCNPassConfig::addRegAssignAndRewriteFast () {
1456
1525
if (!usingDefaultRegAlloc ())
@@ -1462,11 +1531,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
1462
1531
1463
1532
// Equivalent of PEI for SGPRs.
1464
1533
addPass (&SILowerSGPRSpillsID);
1534
+
1535
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1465
1536
addPass (&SIPreAllocateWWMRegsID);
1466
1537
1467
- addPass (createVGPRAllocPass (false ));
1538
+ // For allocating other wwm register operands.
1539
+ addPass (createWWMRegAllocPass (false ));
1468
1540
1469
1541
addPass (&SILowerWWMCopiesID);
1542
+ addPass (&AMDGPUReserveWWMRegsID);
1543
+
1544
+ // For allocating regular VGPRs.
1545
+ addPass (createVGPRAllocPass (false ));
1546
+
1470
1547
return true ;
1471
1548
}
1472
1549
@@ -1486,8 +1563,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1486
1563
1487
1564
// Equivalent of PEI for SGPRs.
1488
1565
addPass (&SILowerSGPRSpillsID);
1566
+
1567
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
1489
1568
addPass (&SIPreAllocateWWMRegsID);
1490
1569
1570
+ // For allocating other whole wave mode registers.
1571
+ addPass (createWWMRegAllocPass (true ));
1572
+ addPass (&SILowerWWMCopiesID);
1573
+ addPass (createVirtRegRewriter (false ));
1574
+ addPass (&AMDGPUReserveWWMRegsID);
1575
+
1576
+ // For allocating regular VGPRs.
1491
1577
addPass (createVGPRAllocPass (true ));
1492
1578
1493
1579
addPreRewrite ();
0 commit comments