diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl index ebc8b08956..3649d2c780 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl @@ -45,6 +45,7 @@ Texture2D srcPrevAo : register(t5); // maybe half-res Texture2D srcPrevY : register(t6); // maybe half-res Texture2D srcPrevCoCg : register(t7); // maybe half-res Texture2D srcPrevGISpecular : register(t8); // maybe half-res +Texture2D srcNormal : register(t9); RWTexture2D outAo : register(u0); RWTexture2D outY : register(u1); @@ -206,14 +207,16 @@ void CalculateGI( float SZ = srcWorkingDepth.SampleLevel(samplerPointClamp, sampleUV * frameScale, mipLevel); // Reconstruct sample in current eye's viewspace for correct horizon angles. + float3 samplePos = ScreenToViewPosition(sampleScreenPos, SZ, sampleEyeIndex); // For cross-eye samples, reject if the depth differs too much from the // center pixel -- the other eye may see a different surface due to occlusion. - float3 samplePos = ScreenToViewPosition(sampleScreenPos, SZ, sampleEyeIndex); +#if defined(VR) if (sampleEyeIndex != eyeIndex) { if (abs(SZ - viewspaceZ) > viewspaceZ * 0.1) continue; samplePos = FrameBuffer::WorldToView(FrameBuffer::ViewToWorld(samplePos, true, sampleEyeIndex), true, eyeIndex); } +#endif float3 sampleDelta = samplePos - pixCenterPos; float3 sampleHorizonVec = normalize(sampleDelta); @@ -263,7 +266,7 @@ void CalculateGI( float giBoost = 4.0 * Math::PI * (1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius)); // IL - float3 normalSample = GBuffer::DecodeNormal(srcNormalRoughness.SampleLevel(samplerPointClamp, sampleUV * frameScale, 0).xy); + float3 normalSample = GBuffer::DecodeNormal(srcNormal.SampleLevel(samplerPointClamp, sampleUV * OUT_FRAME_SCALE, mipLevelRadiance)); if (dot(samplePos, normalSample) > 0) normalSample = -normalSample; float frontBackMult = -dot(normalSample, sampleHorizonVec); @@ -345,7 +348,7 @@ void CalculateGI( float viewspaceZ = READ_DEPTH(srcWorkingDepth, pxCoord); - float2 normalSample = FULLRES_LOAD(srcNormalRoughness, pxCoord, uv * frameScale, samplerLinearClamp).xy; + float2 normalSample = FULLRES_LOAD(srcNormal, pxCoord, uv * OUT_FRAME_SCALE, samplerLinearClamp); float3 viewspaceNormal = GBuffer::DecodeNormal(normalSample); half2 encodedWorldNormal = GBuffer::EncodeNormal(ViewToWorldVector(viewspaceNormal, FrameBuffer::CameraViewInverse[eyeIndex])); diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterNormal.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterNormal.cs.hlsl new file mode 100644 index 0000000000..98b2b40bba --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterNormal.cs.hlsl @@ -0,0 +1,88 @@ +#include "Common/GBuffer.hlsli" +#include "ScreenSpaceGI/common.hlsli" + +Texture2D srcNormalRoughness : register(t0); + +RWTexture2D outNormal0 : register(u0); +RWTexture2D outNormal1 : register(u1); +RWTexture2D outNormal2 : register(u2); +RWTexture2D outNormal3 : register(u3); +RWTexture2D outNormal4 : register(u4); + +float2 NormalMIPFilter(float2 enc0, float2 enc1, float2 enc2, float2 enc3) +{ + float3 avg = GBuffer::DecodeNormal(enc0) + GBuffer::DecodeNormal(enc1) + GBuffer::DecodeNormal(enc2) + GBuffer::DecodeNormal(enc3); + return GBuffer::EncodeNormal(normalize(avg)); +} + +groupshared float2 g_scratchNormal[8][8]; +[numthreads(8, 8, 1)] void main(uint2 dispatchThreadID : SV_DispatchThreadID, uint2 groupThreadID : SV_GroupThreadID) { + const float2 frameScale = FrameDim * RcpTexDim; + + // MIP 0 + const uint2 baseCoord = dispatchThreadID; + const uint2 pixCoord = baseCoord * 2; + const float2 uv = (pixCoord + .5) * RCP_OUT_FRAME_DIM; + + float4 nr0 = srcNormalRoughness.GatherRed(samplerPointClamp, uv * frameScale); + float4 nr1 = srcNormalRoughness.GatherGreen(samplerPointClamp, uv * frameScale); + + float2 normal0 = float2(nr0.w, nr1.w); + float2 normal1 = float2(nr0.z, nr1.z); + float2 normal2 = float2(nr0.x, nr1.x); + float2 normal3 = float2(nr0.y, nr1.y); + + outNormal0[pixCoord + uint2(0, 0)] = normal0; + outNormal0[pixCoord + uint2(1, 0)] = normal1; + outNormal0[pixCoord + uint2(0, 1)] = normal2; + outNormal0[pixCoord + uint2(1, 1)] = normal3; + + // MIP 1 + float2 nm1 = NormalMIPFilter(normal0, normal1, normal2, normal3); + outNormal1[baseCoord] = nm1; + g_scratchNormal[groupThreadID.x][groupThreadID.y] = nm1; + + GroupMemoryBarrierWithGroupSync(); + + // MIP 2 + [branch] if (all((groupThreadID.xy % 2) == 0)) + { + float2 inTL = g_scratchNormal[groupThreadID.x + 0][groupThreadID.y + 0]; + float2 inTR = g_scratchNormal[groupThreadID.x + 1][groupThreadID.y + 0]; + float2 inBL = g_scratchNormal[groupThreadID.x + 0][groupThreadID.y + 1]; + float2 inBR = g_scratchNormal[groupThreadID.x + 1][groupThreadID.y + 1]; + + float2 nm2 = NormalMIPFilter(inTL, inTR, inBL, inBR); + outNormal2[baseCoord / 2] = nm2; + g_scratchNormal[groupThreadID.x][groupThreadID.y] = nm2; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 3 + [branch] if (all((groupThreadID.xy % 4) == 0)) + { + float2 inTL = g_scratchNormal[groupThreadID.x + 0][groupThreadID.y + 0]; + float2 inTR = g_scratchNormal[groupThreadID.x + 2][groupThreadID.y + 0]; + float2 inBL = g_scratchNormal[groupThreadID.x + 0][groupThreadID.y + 2]; + float2 inBR = g_scratchNormal[groupThreadID.x + 2][groupThreadID.y + 2]; + + float2 nm3 = NormalMIPFilter(inTL, inTR, inBL, inBR); + outNormal3[baseCoord / 4] = nm3; + g_scratchNormal[groupThreadID.x][groupThreadID.y] = nm3; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 4 + [branch] if (all((groupThreadID.xy % 8) == 0)) + { + float2 inTL = g_scratchNormal[groupThreadID.x + 0][groupThreadID.y + 0]; + float2 inTR = g_scratchNormal[groupThreadID.x + 4][groupThreadID.y + 0]; + float2 inBL = g_scratchNormal[groupThreadID.x + 0][groupThreadID.y + 4]; + float2 inBR = g_scratchNormal[groupThreadID.x + 4][groupThreadID.y + 4]; + + float2 nm4 = NormalMIPFilter(inTL, inTR, inBL, inBR); + outNormal4[baseCoord / 8] = nm4; + } +} diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp index bd8ac299dd..7a1f4ac0b0 100644 --- a/src/Features/ScreenSpaceGI.cpp +++ b/src/Features/ScreenSpaceGI.cpp @@ -454,6 +454,16 @@ void ScreenSpaceGI::SetupResources() } } + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R8G8_UNORM; + { + texNormal = eastl::make_unique(texDesc); + texNormal->CreateSRV(srvDesc); + for (uint i = 0; i < 5; ++i) { + uavDesc.Texture2D.MipSlice = i; + DX::ThrowIfFailed(device->CreateUnorderedAccessView(texNormal->resource.get(), &uavDesc, uavNormal[i].put())); + } + } + uavDesc.Texture2D.MipSlice = 0; texDesc.MipLevels = srvDesc.Texture2D.MipLevels = 1; srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; @@ -569,7 +579,7 @@ void ScreenSpaceGI::SetupResources() void ScreenSpaceGI::ClearShaderCache() { static const std::vector*> shaderPtrs = { - &prefilterDepthsCompute, &prefilterRadianceCompute, &radianceDisoccCompute, &giCompute, &blurCompute, &stereoSyncCompute, &upsampleCompute + &prefilterDepthsCompute, &prefilterRadianceCompute, &prefilterNormalCompute, &radianceDisoccCompute, &giCompute, &blurCompute, &stereoSyncCompute, &upsampleCompute }; for (auto shader : shaderPtrs) @@ -591,6 +601,7 @@ void ScreenSpaceGI::CompileComputeShaders() shaderInfos = { { &prefilterDepthsCompute, "prefilterDepths.cs.hlsl", { { "LINEAR_FILTER", "" } } }, { &prefilterRadianceCompute, "prefilterRadiance.cs.hlsl", {} }, + { &prefilterNormalCompute, "prefilterNormal.cs.hlsl", {} }, { &radianceDisoccCompute, "radianceDisocc.cs.hlsl", {} }, { &giCompute, "gi.cs.hlsl", {} }, { &blurCompute, "blur.cs.hlsl", {} }, @@ -625,7 +636,7 @@ void ScreenSpaceGI::CompileComputeShaders() bool ScreenSpaceGI::ShadersOK() { - return texNoise && prefilterDepthsCompute && prefilterRadianceCompute && radianceDisoccCompute && giCompute && blurCompute && upsampleCompute; + return texNoise && prefilterDepthsCompute && prefilterRadianceCompute && prefilterNormalCompute && radianceDisoccCompute && giCompute && blurCompute && upsampleCompute; } void ScreenSpaceGI::UpdateSB() @@ -820,6 +831,24 @@ void ScreenSpaceGI::DrawSSGI() lastFrameAccumTexIdx = !lastFrameAccumTexIdx; } + // Prefilter normals + { + TracyD3D11Zone(globals::state->tracyCtx, "SSGI - Prefilter Normals"); + + resetViews(); + srvs.at(0) = rts[globals::deferred->normalRoughnessRT].SRV; + uavs.at(0) = uavNormal[0].get(); + uavs.at(1) = uavNormal[1].get(); + uavs.at(2) = uavNormal[2].get(); + uavs.at(3) = uavNormal[3].get(); + uavs.at(4) = uavNormal[4].get(); + + context->CSSetShaderResources(0, 1, srvs.data()); + context->CSSetUnorderedAccessViews(0, 5, uavs.data(), nullptr); + context->CSSetShader(prefilterNormalCompute.get(), nullptr, 0); + context->Dispatch((internalRes[0] + 15u) >> 4, (internalRes[1] + 15u) >> 4, 1); + } + // GI { TracyD3D11Zone(globals::state->tracyCtx, "SSGI - GI"); @@ -834,6 +863,7 @@ void ScreenSpaceGI::DrawSSGI() srvs.at(6) = texIlY[inputGITexIdx]->srv.get(); srvs.at(7) = texIlCoCg[inputGITexIdx]->srv.get(); srvs.at(8) = texGiSpecular[inputAoTexIdx]->srv.get(); + srvs.at(9) = texNormal->srv.get(); uavs.at(0) = texAo[!inputAoTexIdx]->uav.get(); uavs.at(1) = texIlY[!inputGITexIdx]->uav.get(); diff --git a/src/Features/ScreenSpaceGI.h b/src/Features/ScreenSpaceGI.h index ed5147b80b..45c0c1a03e 100644 --- a/src/Features/ScreenSpaceGI.h +++ b/src/Features/ScreenSpaceGI.h @@ -139,6 +139,8 @@ struct ScreenSpaceGI : Feature eastl::unique_ptr texRadiance = nullptr; eastl::unique_ptr texRadianceTemp = nullptr; winrt::com_ptr uavRadiance[5] = { nullptr }; + eastl::unique_ptr texNormal = nullptr; + winrt::com_ptr uavNormal[5] = { nullptr }; eastl::unique_ptr texAccumFrames[2] = { nullptr }; eastl::unique_ptr texAo[2] = { nullptr }; eastl::unique_ptr texIlY[2] = { nullptr }; @@ -161,6 +163,7 @@ struct ScreenSpaceGI : Feature winrt::com_ptr prefilterDepthsCompute = nullptr; winrt::com_ptr prefilterRadianceCompute = nullptr; + winrt::com_ptr prefilterNormalCompute = nullptr; winrt::com_ptr radianceDisoccCompute = nullptr; winrt::com_ptr giCompute = nullptr; winrt::com_ptr blurCompute = nullptr;