Skip to content

Commit

Permalink
[AMDGPU] Prefer lower total register usage in regions with spilling
Browse files Browse the repository at this point in the history
Change-Id: Ia5c434b0945bdcbc357c5e06c3164118fc91df25
  • Loading branch information
jrbyrnes committed Feb 26, 2024
1 parent 435e75d commit 113052b
Show file tree
Hide file tree
Showing 9 changed files with 707 additions and 226 deletions.
12 changes: 5 additions & 7 deletions llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,9 +409,8 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,

// Sort recorded regions by pressure - highest at the front
void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
llvm::sort(Regions, [this, TargetOcc](const Region *R1, const Region *R2) {
return R2->MaxPressure.less(MF, R1->MaxPressure, TargetOcc);
});
}

Expand Down Expand Up @@ -517,26 +516,25 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
// Minimal Register Strategy

void GCNIterativeScheduler::scheduleMinReg(bool force) {
const auto &ST = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const auto TgtOcc = MFI->getOccupancy();
sortRegionsByPressure(TgtOcc);

auto MaxPressure = Regions.front()->MaxPressure;
for (auto *R : Regions) {
if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
if (!force && R->MaxPressure.less(MF, MaxPressure, TgtOcc))
break;

BuildDAG DAG(*R, *this);
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);

const auto RP = getSchedulePressure(*R, MinSchedule);
LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) {
dbgs() << "\nWarning: Pressure becomes worse after minreg!";
printSchedRP(dbgs(), R->MaxPressure, RP);
});

if (!force && MaxPressure.less(ST, RP, TgtOcc))
if (!force && MaxPressure.less(MF, RP, TgtOcc))
break;

scheduleRegion(*R, MinSchedule, RP);
Expand Down
96 changes: 92 additions & 4 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ void GCNRegPressure::inc(unsigned Reg,
}
}

bool GCNRegPressure::less(const GCNSubtarget &ST,
const GCNRegPressure& O,
bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

const auto SGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
const auto VGPROcc =
Expand All @@ -104,18 +105,103 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,

const auto Occ = std::min(SGPROcc, VGPROcc);
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);

// Give first precedence to the better occupancy.
if (Occ != OtherOcc)
return Occ > OtherOcc;

unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);

// SGPR excess pressure conditions
unsigned ExcessSGPR = std::max(static_cast<int>(getSGPRNum() - MaxSGPRs), 0);
unsigned OtherExcessSGPR =
std::max(static_cast<int>(O.getSGPRNum() - MaxSGPRs), 0);

auto WaveSize = ST.getWavefrontSize();
// The number of virtual VGPRs required to handle excess SGPR
unsigned VGPRForSGPRSpills = (ExcessSGPR + (WaveSize - 1)) / WaveSize;
unsigned OtherVGPRForSGPRSpills =
(OtherExcessSGPR + (WaveSize - 1)) / WaveSize;

unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();

// Unified excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessVGPR =
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) +
VGPRForSGPRSpills - MaxVGPRs),
0);
unsigned OtherExcessVGPR =
std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
OtherVGPRForSGPRSpills - MaxVGPRs),
0);
// Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
// spills
unsigned ExcessArchVGPR = std::max(
static_cast<int>(getVGPRNum(false) + VGPRForSGPRSpills - MaxArchVGPRs),
0);
unsigned OtherExcessArchVGPR =
std::max(static_cast<int>(O.getVGPRNum(false) + OtherVGPRForSGPRSpills -
MaxArchVGPRs),
0);
// AGPR excess pressure conditions
unsigned ExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
: (getAGPRNum() - MaxVGPRs)),
0);
unsigned OtherExcessAGPR = std::max(
static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
: (O.getAGPRNum() - MaxVGPRs)),
0);

bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
bool OtherExcessRP = OtherExcessSGPR || OtherExcessVGPR ||
OtherExcessArchVGPR || OtherExcessAGPR;

// Give second precedence to the reduced number of spills to hold the register
// pressure.
if (ExcessRP || OtherExcessRP) {
// The difference in excess VGPR pressure, after including VGPRs used for
// SGPR spills
int VGPRDiff = ((OtherExcessVGPR + OtherExcessArchVGPR + OtherExcessAGPR) -
(ExcessVGPR + ExcessArchVGPR + ExcessAGPR));

int SGPRDiff = OtherExcessSGPR - ExcessSGPR;

if (VGPRDiff != 0)
return VGPRDiff > 0;
if (SGPRDiff != 0) {
unsigned PureExcessVGPR =
std::max(static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
0) +
std::max(static_cast<int>(getVGPRNum(false) - MaxArchVGPRs), 0);
unsigned OtherPureExcessVGPR =
std::max(
static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) - MaxVGPRs),
0) +
std::max(static_cast<int>(O.getVGPRNum(false) - MaxArchVGPRs), 0);

// If we have a special case where there is a tie in excess VGPR, but one
// of the pressures has VGPR usage from SGPR spills, prefer the pressure
// with SGPR spills.
if (PureExcessVGPR != OtherPureExcessVGPR)
return SGPRDiff < 0;
// If both pressures have the same excess pressure before and after
// accounting for SGPR spills, prefer fewer SGPR spills.
return SGPRDiff > 0;
}
}

bool SGPRImportant = SGPROcc < VGPROcc;
const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;

// if both pressures disagree on what is more important compare vgprs
// If both pressures disagree on what is more important compare vgprs.
if (SGPRImportant != OtherSGPRImportant) {
SGPRImportant = false;
}

// compare large regs pressure
// Give third precedence to lower register tuple pressure.
bool SGPRFirst = SGPRImportant;
for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
if (SGPRFirst) {
Expand All @@ -130,6 +216,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
return VW < OtherVW;
}
}

// Give final precedence to lower general RP.
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
(getVGPRNum(ST.hasGFX90AInsts()) <
O.getVGPRNum(ST.hasGFX90AInsts()));
Expand Down
16 changes: 14 additions & 2 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,20 @@ struct GCNRegPressure {
return getOccupancy(ST) > O.getOccupancy(ST);
}

bool less(const GCNSubtarget &ST, const GCNRegPressure& O,
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
/// Compares \p this GCNRegpressure to \p O, returning true if \p this is
/// less. Since GCNRegpressure contains different types of pressures, and due
/// to target-specific pecularities (e.g. we care about occupancy rather than
/// raw register usage), we determine if \p this GCNRegPressure is less than
/// \p O based on the following tiered comparisons (in order order of
/// precedence):
/// 1. Better occupancy
/// 2. Less spilling (first preference to VGPR spills, then to SGPR spills)
/// 3. Less tuple register pressure (first preference to VGPR tuples if we
/// determine that SGPR pressure is not important)
/// 4. Less raw register pressure (first preference to VGPR tuples if we
/// determine that SGPR pressure is not important)
bool less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;

bool operator==(const GCNRegPressure &O) const {
return std::equal(&Value[0], &Value[TOTAL_KINDS], O.Value);
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,7 @@ void GCNSchedStage::checkScheduling() {

unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);

if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
PressureAfter.getAGPRNum() > MaxVGPRs ||
PressureAfter.getSGPRNum() > MaxSGPRs) {
Expand Down Expand Up @@ -1199,9 +1200,8 @@ bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
}

bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
if (WavesAfter <= MFI.getMinWavesPerEU() &&
!PressureAfter.less(ST, PressureBefore) &&
isRegionWithExcessRP()) {
if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
!PressureAfter.less(MF, PressureBefore)) {
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
return true;
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
}

/// \returns Addressable number of architectural VGPRs supported by the
/// subtarget.
unsigned getAddressableNumArchVGPRs() const {
return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);
}

/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1107,10 +1107,12 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
return IsWave32 ? 1024 : 512;
}

unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }

unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
return 512;
return 256;
return getAddressableNumArchVGPRs(STI);
}

unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,10 @@ unsigned getVGPREncodingGranule(
/// \returns Total number of VGPRs for given subtarget \p STI.
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);

/// \returns Addressable number of architectural VGPRs for a given subtarget \p
/// STI.
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);

/// \returns Addressable number of VGPRs for given subtarget \p STI.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);

Expand Down
Loading

0 comments on commit 113052b

Please sign in to comment.