Skip to content

Commit

Permalink
Vulkan: Automatically merge render passes to the same target when pos…
Browse files Browse the repository at this point in the history
…sible.

Should speed things up a bit on mobile in some games that do stupid
things like GoW. Currently only enabled in GoW, but plan to enable this
globally as it should be quite cheap when nothing is detected.
  • Loading branch information
hrydgard committed Aug 13, 2019
1 parent aea4200 commit e8a101e
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 1 deletion.
1 change: 1 addition & 0 deletions Core/Compatibility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) {
CheckSetting(iniFile, gameID, "DisableAccurateDepth", &flags_.DisableAccurateDepth);
CheckSetting(iniFile, gameID, "MGS2AcidHack", &flags_.MGS2AcidHack);
CheckSetting(iniFile, gameID, "SonicRivalsHack", &flags_.SonicRivalsHack);
CheckSetting(iniFile, gameID, "RenderPassMerge", &flags_.RenderPassMerge);
CheckSetting(iniFile, gameID, "BlockTransferAllowCreateFB", &flags_.BlockTransferAllowCreateFB);
CheckSetting(iniFile, gameID, "YugiohSaveFix", &flags_.YugiohSaveFix);
CheckSetting(iniFile, gameID, "ForceUMDDelay", &flags_.ForceUMDDelay);
Expand Down
1 change: 1 addition & 0 deletions Core/Compatibility.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ struct CompatFlags {
bool DisableAccurateDepth;
bool MGS2AcidHack;
bool SonicRivalsHack;
bool RenderPassMerge;
bool BlockTransferAllowCreateFB;
bool YugiohSaveFix;
bool ForceUMDDelay;
Expand Down
3 changes: 3 additions & 0 deletions GPU/Vulkan/GPU_Vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,9 @@ void GPU_Vulkan::InitDeviceObjects() {
hacks |= QUEUE_HACK_MGS2_ACID;
if (PSP_CoreParameter().compat.flags().SonicRivalsHack)
hacks |= QUEUE_HACK_SONIC;
if (PSP_CoreParameter().compat.flags().RenderPassMerge)
hacks |= QUEUE_HACK_RENDERPASS_MERGE;

if (hacks) {
rm->GetQueueRunner()->EnableHacks(hacks);
}
Expand Down
36 changes: 36 additions & 0 deletions assets/compat.ini
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,39 @@ NPUH10047 = true
ULAS42214 = true
ULJS19054 = true
NPJH50184 = true

[RenderPassMerge]
UCJS10114 = true
UCKS45084 = true
# GOW : Ghost of Sparta
UCUS98737 = true
UCAS40323 = true
NPHG00092 = true
NPEG00044 = true
NPEG00045 = true
NPJG00120 = true
NPUG80508 = true
UCJS10114 = true
UCES01401 = true
UCES01473 = true
# GOW : Ghost of Sparta Demo
NPEG90035 = true
NPUG70125 = true
NPJG90095 = true
# GOW : Chains Of Olympus
UCAS40198 = true
UCUS98653 = true
UCES00842 = true
ULJM05438 = true
ULJM05348 = true
UCKS45084 = true
NPUG80325 = true
NPEG00023 = true
NPHG00027 = true
NPHG00028 = true
NPJH50170 = true
UCET00844 = true
# GOW: Chains of Olympus Demo
UCUS98705 = true
UCED00971 = true
UCUS98713 = true
63 changes: 63 additions & 0 deletions ext/native/thin3d/VulkanQueueRunner.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <map>
#include "DataFormat.h"
#include "VulkanQueueRunner.h"
#include "VulkanRenderManager.h"
Expand Down Expand Up @@ -438,6 +439,9 @@ void VulkanQueueRunner::RunSteps(VkCommandBuffer cmd, std::vector<VKRStep *> &st
if (hacksEnabled_ & QUEUE_HACK_SONIC) {
ApplySonicHack(steps);
}
if (hacksEnabled_ & QUEUE_HACK_RENDERPASS_MERGE) {
ApplyRenderPassMerge(steps);
}
}

for (size_t i = 0; i < steps.size(); i++) {
Expand Down Expand Up @@ -695,6 +699,65 @@ void VulkanQueueRunner::ApplySonicHack(std::vector<VKRStep *> &steps) {
}
}

// Ideally, this should be cheap enough to be applied to all games. At least on mobile, it's pretty
// much a guaranteed neutral or win in terms of GPU power. However, dependency calculation really
// must be perfect!
void VulkanQueueRunner::ApplyRenderPassMerge(std::vector<VKRStep *> &steps) {
// First let's count how many times each framebuffer is rendered to.
// If it's more than one, let's do our best to merge them. This can help God of War quite a bit.

std::map<VKRFramebuffer *, int> counts;
for (int i = 0; i < (int)steps.size(); i++) {
if (steps[i]->stepType == VKRStepType::RENDER) {
counts[steps[i]->render.framebuffer]++;
}
}

// Now, let's go through the steps. If we find one that is rendered to more than once,
// we'll scan forward and slurp up any rendering that can be merged across.
for (int i = 0; i < (int)steps.size(); i++) {
if (steps[i]->stepType == VKRStepType::RENDER && counts[steps[i]->render.framebuffer] > 1) {
auto fb = steps[i]->render.framebuffer;
for (int j = i + 1; j < (int)steps.size(); j++) {
// If any other passes are reading from this framebuffer as-is, we cancel the scan.
switch (steps[j]->stepType) {
case VKRStepType::COPY:
if (steps[j]->copy.src == fb) {
// We're done.
goto done_fb;
}
break;
case VKRStepType::RENDER:
if (steps[j]->dependencies.contains(fb)) {
goto done_fb;
}
if (steps[j]->render.framebuffer == fb) {
// ok. Now, if it's a render, slurp up all the commands
// and kill the step.
// Also slurp up any pretransitions.
for (int k = 0; k < (int)steps[j]->preTransitions.size(); k++) {
steps[i]->preTransitions.push_back(steps[j]->preTransitions[k]);
}
for (int k = 0; k < (int)steps[j]->commands.size(); k++) {
steps[i]->commands.push_back(steps[j]->commands[k]);
}
steps[j]->stepType = VKRStepType::RENDER_SKIP;
}
// keep going.
break;
case VKRStepType::BLIT:
if (steps[j]->blit.src == fb) {
goto done_fb;
}
break;
}
}
done_fb:
;
}
}
}

void VulkanQueueRunner::LogSteps(const std::vector<VKRStep *> &steps) {
ILOG("=======================================");
for (size_t i = 0; i < steps.size(); i++) {
Expand Down
56 changes: 56 additions & 0 deletions ext/native/thin3d/VulkanQueueRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,58 @@ struct VKRImage;
enum {
QUEUE_HACK_MGS2_ACID = 1,
QUEUE_HACK_SONIC = 2,
// Killzone PR = 4.
QUEUE_HACK_RENDERPASS_MERGE = 8,
};

// Insert-only small-set implementation. Performs no allocation unless MaxFastSize is exceeded.
template <class T, int MaxFastSize>
struct TinySet {
~TinySet() { delete slowLookup_; }
inline void insert(T t) {
// Fast linear scan.
for (int i = 0; i < fastCount; i++) {
if (fastLookup_[i] == t)
return; // We already have it.
}
// Fast insertion
if (fastCount < MaxFastSize) {
fastLookup_[fastCount++] = t;
return;
}
// Fall back to slow path.
insertSlow(t);
}
bool contains(T t) const {
for (int i = 0; i < fastCount; i++) {
if (fastLookup_[i] == t)
return true;
}
if (slowLookup_) {
for (auto x : *slowLookup_) {
if (x == t)
return true;
}
}
return false;
}

private:
void insertSlow(T t) {
if (!slowLookup_) {
slowLookup_ = new std::vector<T>();
} else {
for (size_t i = 0; i < slowLookup_->size(); i++) {
if ((*slowLookup_)[i] == t)
return;
}
}
slowLookup_->push_back(t);
}
T fastLookup_[MaxFastSize];
int fastCount = 0;
int slowCount = 0;
std::vector<T> *slowLookup_ = nullptr;
};

enum class VKRRenderCommand : uint8_t {
Expand Down Expand Up @@ -109,9 +161,12 @@ struct TransitionRequest {

struct VKRStep {
VKRStep(VKRStepType _type) : stepType(_type) {}
~VKRStep() {}

VKRStepType stepType;
std::vector<VkRenderData> commands;
std::vector<TransitionRequest> preTransitions;
TinySet<VKRFramebuffer *, 8> dependencies;
union {
struct {
VKRFramebuffer *framebuffer;
Expand Down Expand Up @@ -240,6 +295,7 @@ class VulkanQueueRunner {

void ApplyMGSHack(std::vector<VKRStep *> &steps);
void ApplySonicHack(std::vector<VKRStep *> &steps);
void ApplyRenderPassMerge(std::vector<VKRStep *> &steps);

static void SetupTransitionToTransferSrc(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect);
static void SetupTransitionToTransferDst(VKRImage &img, VkImageMemoryBarrier &barrier, VkPipelineStageFlags &stage, VkImageAspectFlags aspect);
Expand Down
6 changes: 5 additions & 1 deletion ext/native/thin3d/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,8 @@ void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect,
}

VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, int aspectBit, int attachment) {
// Mark the dependency and return the image.
_dbg_assert_(G3D, curRenderStep_ != nullptr);
// Mark the dependency, check for required transitions, and return the image.

for (int i = (int)steps_.size() - 1; i >= 0; i--) {
if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == fb) {
Expand All @@ -813,6 +814,9 @@ VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, in
}
}

// Track dependencies fully.
curRenderStep_->dependencies.insert(fb);

if (!curRenderStep_->preTransitions.empty() &&
curRenderStep_->preTransitions.back().fb == fb &&
curRenderStep_->preTransitions.back().targetLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
Expand Down

0 comments on commit e8a101e

Please sign in to comment.