Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vulkan: Use depth clamping, where available #10454

Merged
merged 5 commits into from
Dec 27, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions GPU/Common/GPUStateUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,9 @@ float DepthSliceFactor() {
if (gstate_c.Supports(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
return DEPTH_SLICE_FACTOR_16BIT;
}
if (gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
return 1.0f;
}
return DEPTH_SLICE_FACTOR_HIGH;
}

Expand Down Expand Up @@ -681,6 +684,7 @@ void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, flo
// So, we apply the depth range as minz/maxz, and transform for the viewport.
float vpZScale = gstate.getViewportZScale();
float vpZCenter = gstate.getViewportZCenter();
// TODO: This clip the entire draw if minz > maxz.
float minz = gstate.getDepthRangeMin();
float maxz = gstate.getDepthRangeMax();

Expand Down
3 changes: 1 addition & 2 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_CLIPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE },

// Z clip
{ GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_VIEWPORTSCISSOR_STATE },
Expand Down Expand Up @@ -1739,7 +1739,6 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
const int end = 12 * 8 - (op & 0x7F);
int i = 0;

// TODO: Validate what should happen when explicitly setting num to 96 or higher.
bool fastLoad = !debugRecording_ && end > 0;
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
fastLoad = false;
Expand Down
122 changes: 90 additions & 32 deletions GPU/GPUState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ alignas(16) GPUgstate gstate;
// Let's align this one too for good measure.
alignas(16) GPUStateCache gstate_c;

// For save state compatibility.
static int savedContextVersion = 1;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be set to 1 somewhere when a game is started, right? Otherwise it could leak over to the second game you run in a session..

Should really rearchitect the whole emu to avoid all these globals... oh well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, yes, thanks.

Once you have one global, it's kinda hard to avoid more.

-[Unknown]


struct CmdRange {
u8 start;
u8 end;
Expand Down Expand Up @@ -77,6 +80,25 @@ static const CmdRange contextCmdRanges[] = {
// Skip: {0xFA, 0xFF},
};

static u32_le *SaveMatrix(u32_le *cmds, const float *mtx, int sz, int numcmd, int datacmd) {
*cmds++ = numcmd << 24;
for (int i = 0; i < sz; ++i) {
*cmds++ = (datacmd << 24) | toFloat24(mtx[i]);
}

return cmds;
}

static const u32_le *LoadMatrix(const u32_le *cmds, float *mtx, int sz) {
// Skip the reset.
cmds++;
for (int i = 0; i < sz; ++i) {
mtx[i] = getFloat24(*cmds++);
}

return cmds;
}

void GPUgstate::Reset() {
memset(gstate.cmdmem, 0, sizeof(gstate.cmdmem));
for (int i = 0; i < 256; i++) {
Expand Down Expand Up @@ -105,22 +127,37 @@ void GPUgstate::Save(u32_le *ptr) {
}
}

if (Memory::IsValidAddress(getClutAddress()))
*cmds++ = loadclut;

// Seems like it actually writes commands to load the matrices and then reset the counts.
*cmds++ = boneMatrixNumber;
*cmds++ = worldmtxnum;
*cmds++ = viewmtxnum;
*cmds++ = projmtxnum;
*cmds++ = texmtxnum;

u8 *matrices = (u8 *)cmds;
memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
if (savedContextVersion == 0) {
if (Memory::IsValidAddress(getClutAddress()))
*cmds++ = loadclut;

// Seems like it actually writes commands to load the matrices and then reset the counts.
*cmds++ = boneMatrixNumber;
*cmds++ = worldmtxnum;
*cmds++ = viewmtxnum;
*cmds++ = projmtxnum;
*cmds++ = texmtxnum;

u8 *matrices = (u8 *)cmds;
memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
} else {
cmds = SaveMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
cmds = SaveMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
cmds = SaveMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
cmds = SaveMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
cmds = SaveMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);

*cmds++ = boneMatrixNumber;
*cmds++ = worldmtxnum;
*cmds++ = viewmtxnum;
*cmds++ = projmtxnum;
*cmds++ = texmtxnum;
*cmds++ = GE_CMD_END << 24;
}
}

void GPUgstate::FastLoadBoneMatrix(u32 addr) {
Expand Down Expand Up @@ -165,27 +202,41 @@ void GPUgstate::Restore(u32_le *ptr) {
gstate_c.offsetAddr = ptr[7];

// Command values start 17 ints in.
u32_le *cmds = ptr + 17;
const u32_le *cmds = ptr + 17;
for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {
for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {
cmdmem[n] = *cmds++;
}
}

if (Memory::IsValidAddress(getClutAddress()))
loadclut = *cmds++;
boneMatrixNumber = *cmds++;
worldmtxnum = *cmds++;
viewmtxnum = *cmds++;
projmtxnum = *cmds++;
texmtxnum = *cmds++;

u8 *matrices = (u8 *)cmds;
memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);
memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
if (savedContextVersion == 0) {
if (Memory::IsValidAddress(getClutAddress()))
loadclut = *cmds++;
boneMatrixNumber = *cmds++;
worldmtxnum = *cmds++;
viewmtxnum = *cmds++;
projmtxnum = *cmds++;
texmtxnum = *cmds++;

u8 *matrices = (u8 *)cmds;
memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);
memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
} else {
cmds = LoadMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix));
cmds = LoadMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix));
cmds = LoadMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix));
cmds = LoadMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix));
cmds = LoadMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix));

boneMatrixNumber = *cmds++;
worldmtxnum = *cmds++;
viewmtxnum = *cmds++;
projmtxnum = *cmds++;
texmtxnum = *cmds++;
}
}

bool vertTypeIsSkinningEnabled(u32 vertType) {
Expand Down Expand Up @@ -217,7 +268,7 @@ void GPUStateCache::Reset() {
}

void GPUStateCache::DoState(PointerWrap &p) {
auto s = p.Section("GPUStateCache", 0, 4);
auto s = p.Section("GPUStateCache", 0, 5);
if (!s) {
// Old state, this was not versioned.
GPUStateCache_v0 old;
Expand All @@ -231,6 +282,8 @@ void GPUStateCache::DoState(PointerWrap &p) {
vertexFullAlpha = old.vertexFullAlpha;
skipDrawReason = old.skipDrawReason;
uv = old.uv;

savedContextVersion = 0;
} else {
p.Do(vertexAddr);
p.Do(indexAddr);
Expand Down Expand Up @@ -290,4 +343,9 @@ void GPUStateCache::DoState(PointerWrap &p) {
p.Do(curRTHeight);

// curRTBufferWidth, curRTBufferHeight, and cutRTOffsetX don't need to be saved.
if (s < 5) {
savedContextVersion = 0;
} else {
p.Do(savedContextVersion);
}
}
1 change: 1 addition & 0 deletions GPU/GPUState.h
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ enum {
GPU_SUPPORTS_VERTEX_TEXTURE_FETCH = FLAG_BIT(11),
GPU_SUPPORTS_TEXTURE_FLOAT = FLAG_BIT(12),
GPU_SUPPORTS_16BIT_FORMATS = FLAG_BIT(13),
GPU_SUPPORTS_DEPTH_CLAMP = FLAG_BIT(14),
GPU_SUPPORTS_LARGE_VIEWPORTS = FLAG_BIT(16),
GPU_SUPPORTS_ACCURATE_DEPTH = FLAG_BIT(17),
GPU_SUPPORTS_VAO = FLAG_BIT(18),
Expand Down
4 changes: 2 additions & 2 deletions GPU/Software/Clipper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ void ProcessLine(VertexData& v0, VertexData& v1)
return;
}

if (mask && (gstate.clipEnable & 0x1)) {
if (mask && gstate.isClippingEnabled()) {
// discard if any vertex is outside the near clipping plane
if (mask & CLIP_NEG_Z_BIT)
return;
Expand Down Expand Up @@ -303,7 +303,7 @@ void ProcessTriangle(VertexData& v0, VertexData& v1, VertexData& v2)
mask |= CalcClipMask(v1.clippos);
mask |= CalcClipMask(v2.clippos);

if (mask && (gstate.clipEnable & 0x1)) {
if (mask && gstate.isClippingEnabled()) {
// discard if any vertex is outside the near clipping plane
if (mask & CLIP_NEG_Z_BIT)
return;
Expand Down
4 changes: 2 additions & 2 deletions GPU/Software/TransformUnit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ static inline ScreenCoords ClipToScreenInternal(const ClipCoords& coords, bool *
float y = coords.y * yScale / coords.w + yCenter;
float z = coords.z * zScale / coords.w + zCenter;

// Is this really right?
if (gstate.clipEnable & 0x1) {
// This matches hardware tests - depth is clamped when this flag is on.
if (gstate.isClippingEnabled()) {
if (z < 0.f)
z = 0.f;
if (z > 65535.f)
Expand Down
14 changes: 13 additions & 1 deletion GPU/Vulkan/FragmentShaderGeneratorVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,8 +456,20 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
}

if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
const double scale = DepthSliceFactor() * 65535.0;

WRITE(p, " highp float z = gl_FragCoord.z;\n");
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
if (gstate_c.Supports(GPU_SUPPORTS_ACCURATE_DEPTH)) {
// We center the depth with an offset, but only its fraction matters.
// When (DepthSliceFactor() - 1) is odd, it will be 0.5, otherwise 0.
if (((int)(DepthSliceFactor() - 1.0f) & 1) == 1) {
WRITE(p, " z = (floor((z * %f) - (1.0 / 2.0)) + (1.0 / 2.0)) * (1.0 / %f);\n", scale, scale);
} else {
WRITE(p, " z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
}
} else {
WRITE(p, " z = (1.0/65535.0) * floor(z * 65535.0);\n");
}
WRITE(p, " gl_FragDepth = z;\n");
}

Expand Down
3 changes: 3 additions & 0 deletions GPU/Vulkan/GPU_Vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ void GPU_Vulkan::CheckGPUFeatures() {
if (vulkan_->GetFeaturesEnabled().wideLines) {
features |= GPU_SUPPORTS_WIDE_LINES;
}
if (vulkan_->GetFeaturesEnabled().depthClamp) {
features |= GPU_SUPPORTS_DEPTH_CLAMP;
}
if (vulkan_->GetFeaturesEnabled().dualSrcBlend) {
switch (vulkan_->GetPhysicalDeviceProperties().vendorID) {
case VULKAN_VENDOR_NVIDIA:
Expand Down
2 changes: 1 addition & 1 deletion GPU/Vulkan/PipelineManagerVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ static VulkanPipeline *CreateVulkanPipeline(VkDevice device, VkPipelineCache pip
rs.lineWidth = lineWidth;
rs.rasterizerDiscardEnable = false;
rs.polygonMode = VK_POLYGON_MODE_FILL;
rs.depthClampEnable = false;
rs.depthClampEnable = key.depthClampEnable;

VkPipelineMultisampleStateCreateInfo ms = { VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO };
ms.pSampleMask = nullptr;
Expand Down
3 changes: 3 additions & 0 deletions GPU/Vulkan/StateMappingVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,13 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
if (gstate_c.IsDirty(DIRTY_RASTER_STATE)) {
if (gstate.isModeClear()) {
key.cullMode = VK_CULL_MODE_NONE;
// TODO: Or does it always clamp?
key.depthClampEnable = false;
} else {
// Set cull
bool wantCull = !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES && gstate.isCullEnabled();
key.cullMode = wantCull ? (gstate.getCullMode() ? VK_CULL_MODE_FRONT_BIT : VK_CULL_MODE_BACK_BIT) : VK_CULL_MODE_NONE;
key.depthClampEnable = gstate.isClippingEnabled() && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP);
}
}

Expand Down
5 changes: 3 additions & 2 deletions GPU/Vulkan/StateMappingVulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ struct VulkanDynamicState {
// Let's pack this tight using bitfields.
// If an enable flag is set to 0, all the data fields for that section should
// also be set to 0.
// ~54 bits.
// ~64 bits.
// Can't use enums unfortunately, they end up signed and breaking values above half their ranges.
struct VulkanPipelineRasterStateKey {
// Blend
Expand All @@ -37,6 +37,7 @@ struct VulkanPipelineRasterStateKey {
unsigned int colorWriteMask : 4;

// Depth/Stencil
unsigned int depthClampEnable : 1;
unsigned int depthTestEnable : 1;
unsigned int depthWriteEnable : 1;
unsigned int depthCompareOp : 3; // VkCompareOp
Expand All @@ -57,4 +58,4 @@ struct VulkanPipelineRasterStateKey {
size_t size = sizeof(VulkanPipelineRasterStateKey);
return memcmp(this, &other, size) < 0;
}
};
};
1 change: 1 addition & 0 deletions headless/Headless.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,7 @@ int main(int argc, const char* argv[])
g_Config.bVertexDecoderJit = true;
g_Config.bBlockTransferGPU = true;
g_Config.iSplineBezierQuality = 2;
g_Config.bHighQualityDepth = true;

#ifdef _WIN32
InitSysDirectories();
Expand Down