From 31e050d7ca8148cf2b7ff26ed8baaec8b5b29697 Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sat, 18 Apr 2026 18:23:33 +0100 Subject: [PATCH 01/17] perf: use ps for deferred --- package/Shaders/DeferredCompositePS.hlsl | 355 +++++++++++++++++++++++ package/Shaders/DeferredCompositeVS.hlsl | 14 + src/Deferred.cpp | 232 ++++++++------- src/Deferred.h | 17 +- src/Features/VRStereoOptimizations.h | 2 +- src/Utils/D3DStateBackup.h | 153 ++++++++++ 6 files changed, 663 insertions(+), 110 deletions(-) create mode 100644 package/Shaders/DeferredCompositePS.hlsl create mode 100644 package/Shaders/DeferredCompositeVS.hlsl create mode 100644 src/Utils/D3DStateBackup.h diff --git a/package/Shaders/DeferredCompositePS.hlsl b/package/Shaders/DeferredCompositePS.hlsl new file mode 100644 index 0000000000..a7a84a82c1 --- /dev/null +++ b/package/Shaders/DeferredCompositePS.hlsl @@ -0,0 +1,355 @@ + +#include "Common/BRDF.hlsli" +#include "Common/Color.hlsli" +#include "Common/FrameBuffer.hlsli" +#include "Common/GBuffer.hlsli" +#include "Common/MotionBlur.hlsli" +#include "Common/Shading.hlsli" +#include "Common/SharedData.hlsli" +#include "Common/Spherical Harmonics/SphericalHarmonics.hlsli" +#include "Common/VR.hlsli" + +Texture2D SpecularTexture : register(t0); +Texture2D AlbedoTexture : register(t1); +Texture2D NormalRoughnessTexture : register(t2); +Texture2D MasksTexture : register(t3); + +Texture2D DepthTexture : register(t4); + +Texture2D MainInputTexture : register(t17); +Texture2D MotionVectorsInputTexture : register(t18); + +#if defined(VR_STEREO_OPT) +# include "VRStereoOptimizations/modes.hlsli" +Texture2D StereoOptModeTexture : register(t16); +#endif + +#if defined(DYNAMIC_CUBEMAPS) +Texture2D ReflectanceTexture : register(t5); +TextureCube EnvTexture : register(t6); +TextureCube EnvReflectionsTexture : register(t7); + +SamplerState LinearSampler : register(s0); +#endif + +#if defined(SKYLIGHTING) +# include "Skylighting/Skylighting.hlsli" + +Texture3D SkylightingProbeArray : register(t8); +Texture2DArray stbn_vec3_2Dx1D_128x128x64 : register(t9); + +#endif + +#if defined(SSGI) +Texture2D SsgiAoTexture : register(t10); +Texture2D SsgiYTexture : register(t11); +Texture2D SsgiCoCgTexture : register(t12); +Texture2D SsgiSpecularTexture : register(t13); + +void SampleSSGI(uint2 pixCoord, float3 normalWS, out float ao, out float3 il) +{ + ao = 1 - SsgiAoTexture[pixCoord]; + float4 ssgiIlYSh = SsgiYTexture[pixCoord]; + // without ZH hallucination + // float ssgiIlY = SphericalHarmonics::FuncProductIntegral(ssgiIlYSh, SphericalHarmonics::EvaluateCosineLobe(normalWS)); + float ssgiIlY = SphericalHarmonics::SHHallucinateZH3Irradiance(ssgiIlYSh, normalWS); + float2 ssgiIlCoCg = SsgiCoCgTexture[pixCoord]; + il = max(0, Color::YCoCgToRGB(float3(ssgiIlY, ssgiIlCoCg))); +} + +void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, in float3 normal, in float3 view, in float roughness) +{ + float NdotV = dot(normal, view); + float alpha = roughness * roughness; + ao = SpecularOcclusion(saturate(NdotV), alpha, ao); + + float4 ssgiIlYSh = SsgiYTexture[pixCoord]; + float ssgiIlY = SphericalHarmonics::FuncProductIntegral(ssgiIlYSh, lobe); + float2 ssgiIlCoCg = SsgiCoCgTexture[pixCoord].xy; + + // pi to compensate for the /pi in specularLobe + // i don't think there really should be a 1/PI but without it the specular is too strong + // reflectance being ambient reflectance doesn't help either + il = max(0, Color::YCoCgToRGB(float3(ssgiIlY, ssgiIlCoCg / Math::PI))); + + // HQ spec + float4 hq_spec = SsgiSpecularTexture[pixCoord]; + ao *= 1 - hq_spec.a; + il += hq_spec.rgb; +} +#endif + +#if defined(IBL) +# if !defined(DYNAMIC_CUBEMAPS) +# undef IBL +# else +# define IBL_DEFERRED +# include "IBL/IBL.hlsli" +# endif +#endif + +struct PS_INPUT +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; +}; + +struct PS_OUTPUT +{ + float4 Main : SV_Target0; + float4 NormalTAAMaskSpecularMask : SV_Target1; + float4 MotionVectors : SV_Target2; +}; + +PS_OUTPUT main(PS_INPUT input) +{ + uint2 pixCoord = uint2(input.Position.xy); + + float2 uv = float2(pixCoord + 0.5) * SharedData::BufferDim.zw; + uv *= FrameBuffer::DynamicResolutionParams2.xy; // adjust for dynamic res + + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[uint2(pixCoord)] & 0x0F; + if (mode == MODE_MAIN) { // stencil-culled in Eye 1, filled by ReprojectionCS + PS_OUTPUT discardOutput; + discardOutput.Main = MainInputTexture[pixCoord]; + discardOutput.NormalTAAMaskSpecularMask = float4(0, 0, 0, 0); + discardOutput.MotionVectors = float4(MotionVectorsInputTexture[pixCoord], 0, 0); + return discardOutput; + } + } +#endif + + uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); + + float3 normalGlossiness = NormalRoughnessTexture[pixCoord]; + float3 normalVS = GBuffer::DecodeNormal(normalGlossiness.xy); + + float3 diffuseColor = MainInputTexture[pixCoord].xyz; + float3 specularColor = SpecularTexture[pixCoord]; + float3 albedo = AlbedoTexture[pixCoord]; + + float depth = DepthTexture[pixCoord]; + float4 positionWS = float4(2 * float2(uv.x, -uv.y + 1) - 1, depth, 1); + positionWS = mul(FrameBuffer::CameraViewProjInverse[eyeIndex], positionWS); + positionWS.xyz = positionWS.xyz / positionWS.w; + + float2 motionVectorsOutput = MotionVectorsInputTexture[pixCoord]; + if (depth == 1.0) + motionVectorsOutput = MotionBlur::GetSSMotionVector(positionWS, positionWS, eyeIndex); + + float glossiness = normalGlossiness.z; + + float3 linDiffuseColor = Color::IrradianceToLinear(diffuseColor); + float3 normalWS = normalize(mul(FrameBuffer::CameraViewInverse[eyeIndex], float4(normalVS, 0)).xyz); + +#if defined(SSGI) + + float ssgiAo; + float3 ssgiIl; + SampleSSGI(pixCoord, normalWS, ssgiAo, ssgiIl); + + float3 linAlbedo = Color::IrradianceToLinear(albedo / Color::PBRLightingScale); + float3 multiBounceSSGIAo = MultiBounceAO(linAlbedo, ssgiAo); + + float3 directionalAmbientColor = 0; + +# if defined(IBL) + if (SharedData::iblSettings.EnableIBL) { + float3 vanillaDALC = Color::Ambient(max(0, SharedData::GetAmbient(normalWS))); + +# if defined(SKYLIGHTING) +# if defined(VR) + float3 positionMS = positionWS.xyz + FrameBuffer::CameraPosAdjust[eyeIndex].xyz - FrameBuffer::CameraPosAdjust[0].xyz; +# else + float3 positionMS = positionWS.xyz; +# endif + sh2 skylightingSH = Skylighting::sample(SharedData::skylightingSettings, SkylightingProbeArray, stbn_vec3_2Dx1D_128x128x64, pixCoord, positionMS.xyz, normalWS); + float skylightingDiffuse = SphericalHarmonics::FuncProductIntegral(skylightingSH, SphericalHarmonics::EvaluateCosineLobe(normalWS)) / Math::PI; + skylightingDiffuse = saturate(skylightingDiffuse); + skylightingDiffuse = Skylighting::mixDiffuse(SharedData::skylightingSettings, skylightingDiffuse); + directionalAmbientColor = ImageBasedLighting::GetDiffuseIBLOccluded(vanillaDALC, -normalWS, skylightingDiffuse) * albedo; +# else + directionalAmbientColor = ImageBasedLighting::GetDiffuseIBL(vanillaDALC, -normalWS) * albedo; +# endif + + directionalAmbientColor = Color::RGBToYCoCg(directionalAmbientColor); + directionalAmbientColor.x = MasksTexture[pixCoord].z; + directionalAmbientColor = Color::YCoCgToRGB(directionalAmbientColor); + directionalAmbientColor = max(0, directionalAmbientColor); + } else +# endif + { + directionalAmbientColor = Color::Ambient(max(0, SharedData::GetAmbient(normalWS))); + directionalAmbientColor *= albedo; + + directionalAmbientColor = Color::RGBToYCoCg(directionalAmbientColor); + directionalAmbientColor.x = MasksTexture[pixCoord].z; + directionalAmbientColor = Color::YCoCgToRGB(directionalAmbientColor); + directionalAmbientColor = max(0, directionalAmbientColor); + } + + { + float maxScale = 1.0; + if (directionalAmbientColor.x > 0.0) + maxScale = min(maxScale, diffuseColor.x / directionalAmbientColor.x); + if (directionalAmbientColor.y > 0.0) + maxScale = min(maxScale, diffuseColor.y / directionalAmbientColor.y); + if (directionalAmbientColor.z > 0.0) + maxScale = min(maxScale, diffuseColor.z / directionalAmbientColor.z); + directionalAmbientColor *= maxScale; + + diffuseColor = max(0.0, diffuseColor - directionalAmbientColor); + linDiffuseColor = Color::IrradianceToLinear(diffuseColor); + linDiffuseColor *= sqrt(multiBounceSSGIAo); + diffuseColor = Color::IrradianceToGamma(linDiffuseColor); + diffuseColor += Color::IrradianceToGamma(Color::IrradianceToLinear(directionalAmbientColor) * multiBounceSSGIAo); + linDiffuseColor = Color::IrradianceToLinear(diffuseColor); + } + + linDiffuseColor += ssgiIl * linAlbedo; +#endif + + float3 color = linDiffuseColor + specularColor; + +#if defined(DYNAMIC_CUBEMAPS) + + float3 reflectance = ReflectanceTexture[pixCoord]; + + if (any(reflectance > 0.0)) { + float3 V = -normalize(positionWS.xyz); + float3 R = reflect(-V, normalWS); + + float roughness = 1.0 - glossiness; + float level = roughness * 7.0; + + sh2 specularLobe = SphericalHarmonics::FauxSpecularLobe(normalWS, V, roughness); + + float3 finalIrradiance = 0; + + float directionalAmbientColorSpecular = Color::RGBToLuminance(Color::Ambient(max(0, SharedData::GetAmbient(R)))) * Color::ReflectionNormalisationScale; + +# if defined(SKYLIGHTING) +# if defined(VR) + float3 positionMS = positionWS.xyz + FrameBuffer::CameraPosAdjust[eyeIndex].xyz - FrameBuffer::CameraPosAdjust[0].xyz; +# else + float3 positionMS = positionWS.xyz; +# endif + + sh2 skylighting = Skylighting::sample(SharedData::skylightingSettings, SkylightingProbeArray, stbn_vec3_2Dx1D_128x128x64, pixCoord, positionMS.xyz, R); + + float skylightingSpecular = SphericalHarmonics::FuncProductIntegral(skylighting, specularLobe); + skylightingSpecular = saturate(skylightingSpecular); + skylightingSpecular = Skylighting::mixSpecular(SharedData::skylightingSettings, skylightingSpecular); +# endif + +# if defined(IBL) + if (SharedData::iblSettings.EnableIBL) { + float3 envSample = EnvTexture.SampleLevel(LinearSampler, R, level); + float3 fullSample = EnvReflectionsTexture.SampleLevel(LinearSampler, R, level); + float3 envSpecular, skySpecular; + + if (SharedData::iblSettings.DALCMode >= 2) { + // Mode 2/3: DALC-normalized env scaled by DALCAmount + sky overlay + float envLum = Color::RGBToLuminance(EnvTexture.SampleLevel(LinearSampler, R, 15)); + envSpecular = Color::IrradianceToLinear((envSample / max(envLum, 0.001)) * directionalAmbientColorSpecular) * SharedData::iblSettings.DALCAmount; + skySpecular = Color::IrradianceToLinear(max(0, fullSample - envSample)) * SharedData::iblSettings.SkyIBLScale; +# if defined(SKYLIGHTING) + envSpecular *= (SharedData::iblSettings.DALCMode == 3) ? skylightingSpecular : 1.0; + skySpecular *= skylightingSpecular; +# elif defined(INTERIOR) + skySpecular = 0; +# endif + } else { + // Mode 0/1: IBL ratio-based + float3 ratio = ImageBasedLighting::GetIBLRatio(); + envSpecular = Color::IrradianceToLinear(envSample * ratio) * SharedData::iblSettings.EnvIBLScale; + skySpecular = Color::IrradianceToLinear(max(0, fullSample - envSample)) * SharedData::iblSettings.SkyIBLScale; +# if defined(SKYLIGHTING) + skySpecular *= skylightingSpecular; +# elif defined(INTERIOR) + skySpecular = 0; +# endif + } + + finalIrradiance = envSpecular + skySpecular; + } else +# endif + { + // Fallback without IBL: normalize-by-luminance with DALC +# if defined(INTERIOR) + float3 specularIrradiance = EnvTexture.SampleLevel(LinearSampler, R, level); + float specularIrradianceLuminance = Color::RGBToLuminance(EnvTexture.SampleLevel(LinearSampler, R, 15)); + specularIrradiance = (specularIrradiance / max(specularIrradianceLuminance, 0.001)) * directionalAmbientColorSpecular; + finalIrradiance = Color::IrradianceToLinear(specularIrradiance); +# elif defined(SKYLIGHTING) + float3 specularIrradianceReflections = 0.0; + if (skylightingSpecular > 0.0) { + specularIrradianceReflections = EnvReflectionsTexture.SampleLevel(LinearSampler, R, level); + float lum = Color::RGBToLuminance(EnvReflectionsTexture.SampleLevel(LinearSampler, R, 15)); + specularIrradianceReflections = (specularIrradianceReflections / max(lum, 0.001)) * directionalAmbientColorSpecular; + specularIrradianceReflections = Color::IrradianceToLinear(specularIrradianceReflections); + } + float3 specularIrradiance = 0.0; + if (skylightingSpecular < 1.0) { + specularIrradiance = EnvTexture.SampleLevel(LinearSampler, R, level); + float lum = Color::RGBToLuminance(EnvTexture.SampleLevel(LinearSampler, R, 15)); + float dalcScaled = Color::IrradianceToGamma(Color::IrradianceToLinear(directionalAmbientColorSpecular) * skylightingSpecular); + specularIrradiance = (specularIrradiance / max(lum, 0.001)) * dalcScaled; + specularIrradiance = Color::IrradianceToLinear(specularIrradiance); + } + finalIrradiance = lerp(specularIrradiance, specularIrradianceReflections, skylightingSpecular); +# else + float3 specularIrradiance = EnvReflectionsTexture.SampleLevel(LinearSampler, R, level); + float specularIrradianceLuminance = Color::RGBToLuminance(EnvReflectionsTexture.SampleLevel(LinearSampler, R, 15)); + specularIrradiance = (specularIrradiance / max(specularIrradianceLuminance, 0.001)) * directionalAmbientColorSpecular; + finalIrradiance = Color::IrradianceToLinear(specularIrradiance); +# endif + } + +# if defined(SSGI) + float3 ssgiIlSpecular; + SampleSSGISpecular(pixCoord, specularLobe, ssgiAo, ssgiIlSpecular, normalWS, V, roughness); + + finalIrradiance = (finalIrradiance * ssgiAo); + + ssgiIlSpecular = Color::RGBToYCoCg(ssgiIlSpecular); + ssgiIlSpecular = max(0, Color::YCoCgToRGB(float3(ssgiIlSpecular.x, lerp(ssgiIlSpecular.yz, Color::RGBToYCoCg(finalIrradiance).yz, 0.5)))); + + finalIrradiance += ssgiIlSpecular; +# endif + + color += reflectance * finalIrradiance; + } + +#endif + + color = Color::IrradianceToGamma(color); + +#if defined(DEBUG) + +# if defined(VR) + uv.x += (eyeIndex ? 0.1 : -0.1); +# endif // VR + + if (uv.x < 0.5 && uv.y < 0.5) { + color = color; + } else if (uv.x < 0.5) { + color = albedo; + } else if (uv.y < 0.5) { + color = normalVS; + } else { + color = glossiness; + } + +#endif + + PS_OUTPUT output; + output.Main = float4(color, 1.0); + output.NormalTAAMaskSpecularMask = float4(GBuffer::EncodeNormalVanilla(normalVS), 0.0, 0.0); + output.MotionVectors = float4(motionVectorsOutput, 0.0, 0.0); + return output; +} diff --git a/package/Shaders/DeferredCompositeVS.hlsl b/package/Shaders/DeferredCompositeVS.hlsl new file mode 100644 index 0000000000..517654f57d --- /dev/null +++ b/package/Shaders/DeferredCompositeVS.hlsl @@ -0,0 +1,14 @@ +struct VS_OUTPUT +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; +}; + +VS_OUTPUT main(uint vertexID : SV_VertexID) +{ + VS_OUTPUT output; + float2 uv = float2((vertexID << 1) & 2, vertexID & 2); + output.Position = float4(uv * float2(2, -2) + float2(-1, 1), 0, 1); + output.TexCoord = uv; + return output; +} diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 8f5433de4d..8d09be9d79 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -17,6 +17,7 @@ #include "Features/WeatherEditor.h" #include "Hooks.h" +#include "Utils/D3DStateBackup.h" struct DepthStates { @@ -133,25 +134,49 @@ void Deferred::SetupResources() } { - D3D11_TEXTURE2D_DESC texDesc; - auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; - mainTex.texture->GetDesc(&texDesc); + auto device = globals::d3d::device; - texDesc.Format = DXGI_FORMAT_R11G11B10_FLOAT; - texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + D3D11_TEXTURE2D_DESC mainTexDesc{}; + renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN].texture->GetDesc(&mainTexDesc); - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { - .Format = texDesc.Format, + D3D11_TEXTURE2D_DESC copyDesc = mainTexDesc; + copyDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + copyDesc.MiscFlags = 0; + mainCopy = std::make_unique(copyDesc); + mainCopy->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = copyDesc.Format, .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, - .Texture2D = { - .MostDetailedMip = 0, - .MipLevels = 1 } - }; - D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { - .Format = texDesc.Format, - .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, - .Texture2D = { .MipSlice = 0 } - }; + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + + D3D11_TEXTURE2D_DESC mvTexDesc{}; + renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR].texture->GetDesc(&mvTexDesc); + + D3D11_TEXTURE2D_DESC mvCopyDesc = mvTexDesc; + mvCopyDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + mvCopyDesc.MiscFlags = 0; + motionVectorsCopy = std::make_unique(mvCopyDesc); + motionVectorsCopy->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = mvCopyDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + + D3D11_BLEND_DESC blendDesc{}; + blendDesc.IndependentBlendEnable = FALSE; + blendDesc.RenderTarget[0].BlendEnable = FALSE; + blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, compositeBlendState.put())); + + D3D11_DEPTH_STENCIL_DESC dsDesc{}; + dsDesc.DepthEnable = FALSE; + dsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + dsDesc.StencilEnable = FALSE; + DX::ThrowIfFailed(device->CreateDepthStencilState(&dsDesc, compositeDepthStencilState.put())); + + D3D11_RASTERIZER_DESC rsDesc{}; + rsDesc.FillMode = D3D11_FILL_SOLID; + rsDesc.CullMode = D3D11_CULL_NONE; + rsDesc.DepthClipEnable = FALSE; + DX::ThrowIfFailed(device->CreateRasterizerState(&rsDesc, compositeRasterizerState.put())); } } @@ -273,9 +298,12 @@ void Deferred::StartDeferred() vrBuffer = *VRValues.get(); } if (vrBuffer) { + context->PSSetConstantBuffers(12, 1, buffers); + context->PSSetConstantBuffers(13, 1, &vrBuffer); context->CSSetConstantBuffers(12, 1, buffers); context->CSSetConstantBuffers(13, 1, &vrBuffer); } else { + context->PSSetConstantBuffers(12, 1, buffers); context->CSSetConstantBuffers(12, 1, buffers); } } @@ -299,22 +327,6 @@ void Deferred::DeferredPasses() auto renderer = globals::game::renderer; auto context = globals::d3d::context; - { - ID3D11Buffer* buffers[1] = { *globals::game::perFrame }; - ID3D11Buffer* vrBuffer = nullptr; - - if (REL::Module::IsVR()) { - static REL::Relocation VRValues{ REL::Offset(0x3180688) }; - vrBuffer = *VRValues.get(); - } - if (vrBuffer) { - context->CSSetConstantBuffers(12, 1, buffers); - context->CSSetConstantBuffers(13, 1, &vrBuffer); - } else { - context->CSSetConstantBuffers(12, 1, buffers); - } - } - auto specular = renderer->GetRuntimeData().renderTargets[SPECULAR]; auto albedo = renderer->GetRuntimeData().renderTargets[ALBEDO]; auto normalRoughness = renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS]; @@ -337,8 +349,6 @@ void Deferred::DeferredPasses() auto [ssgi_ao, ssgi_y, ssgi_cocg, ssgi_gi_spec] = ssgi.GetOutputTextures(); bool ssgi_hq_spec = ssgi.settings.EnableExperimentalSpecularGI; - auto dispatchCount = Util::GetScreenDispatchCount(true); - auto& sss = globals::features::subsurfaceScattering; if (sss.loaded) sss.DrawSSS(); @@ -353,7 +363,27 @@ void Deferred::DeferredPasses() { TracyD3D11Zone(globals::state->tracyCtx, "Deferred Composite"); - ID3D11ShaderResourceView* srvs[16]{ + Util::D3DStateBackup stateBackup; + stateBackup.Backup(context); + + context->CopyResource(mainCopy->resource.get(), main.texture); + context->CopyResource(motionVectorsCopy->resource.get(), motionVectors.texture); + + // Constant buffers + { + ID3D11Buffer* buffers[1] = { *globals::game::perFrame }; + context->PSSetConstantBuffers(12, 1, buffers); + + if (REL::Module::IsVR()) { + static REL::Relocation VRValues{ REL::Offset(0x3180688) }; + ID3D11Buffer* vrBuffer = *VRValues.get(); + if (vrBuffer) + context->PSSetConstantBuffers(13, 1, &vrBuffer); + } + } + + // SRVs + ID3D11ShaderResourceView* srvs[19]{ specular.SRV, albedo.SRV, normalRoughness.SRV, @@ -370,32 +400,47 @@ void Deferred::DeferredPasses() ssgi_hq_spec ? ssgi_gi_spec : nullptr, ibl.loaded ? ibl.envIBLTexture->srv.get() : nullptr, ibl.loaded ? ibl.skyIBLTexture->srv.get() : nullptr, + nullptr, + mainCopy->srv.get(), + motionVectorsCopy->srv.get(), }; - if (dynamicCubemaps.loaded) - context->CSSetSamplers(0, 1, &linearSampler); - - context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); - - // Bind VRStereoOptimizations mode texture for Eye 1 skip. - // Bind null when disabled so stale mode data doesn't cause incorrect early-exits - // in DeferredCompositeCS (null SRV reads return 0 = MODE_DISOCCLUDED, all pixels composite normally). auto& vrStereoOpt = globals::features::vr.stereoOpt; bool stereoCullingReady = globals::features::vr.IsStereoOptimizationCullingReady(); - ID3D11ShaderResourceView* modeSRV = stereoCullingReady ? vrStereoOpt.GetModeTextureSRV() : nullptr; - context->CSSetShaderResources(16, 1, &modeSRV); + srvs[16] = stereoCullingReady ? vrStereoOpt.GetModeTextureSRV() : nullptr; - ID3D11UnorderedAccessView* uavs[3]{ main.UAV, normals.UAV, motionVectors.UAV }; - context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); + context->PSSetShaderResources(0, ARRAYSIZE(srvs), srvs); - auto shader = interior ? GetComputeMainCompositeInterior() : GetComputeMainComposite(); - context->CSSetShader(shader, nullptr, 0); - - context->Dispatch(dispatchCount.x, dispatchCount.y, 1); - - // Unbind mode texture SRV - ID3D11ShaderResourceView* nullSRV = nullptr; - context->CSSetShaderResources(16, 1, &nullSRV); + if (dynamicCubemaps.loaded) + context->PSSetSamplers(0, 1, &linearSampler); + + // Render targets + ID3D11RenderTargetView* rtvs[3]{ main.RTV, normals.RTV, motionVectors.RTV }; + context->OMSetRenderTargets(ARRAYSIZE(rtvs), rtvs, nullptr); + context->OMSetBlendState(compositeBlendState.get(), nullptr, 0xFFFFFFFF); + context->OMSetDepthStencilState(compositeDepthStencilState.get(), 0); + + // Viewport + float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); + D3D11_VIEWPORT vp{}; + vp.Width = resolution.x; + vp.Height = resolution.y; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + context->RSSetState(compositeRasterizerState.get()); + + // Shaders and draw + context->VSSetShader(GetCompositeVS(), nullptr, 0); + context->PSSetShader(GetCompositePS(interior), nullptr, 0); + context->GSSetShader(nullptr, nullptr, 0); + + context->IASetInputLayout(nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + context->Draw(3, 0); + + stateBackup.Restore(context); } // VR: Deactivate stencil culling now that geometry rendering is complete. @@ -413,20 +458,6 @@ void Deferred::DeferredPasses() globals::features::vr.DrawStereoBlend(); } - // Clear - { - ID3D11ShaderResourceView* views[16]{ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; - context->CSSetShaderResources(0, ARRAYSIZE(views), views); - - ID3D11UnorderedAccessView* uavs[3]{ nullptr, nullptr, nullptr }; - context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); - - ID3D11Buffer* buffers[1] = { nullptr }; - context->CSSetConstantBuffers(12, 1, buffers); - - context->CSSetShader(nullptr, nullptr, 0); - } - if (dynamicCubemaps.loaded) dynamicCubemaps.PostDeferred(); } @@ -554,57 +585,48 @@ void Deferred::ResetBlendStates() void Deferred::ClearShaderCache() { - if (mainCompositeCS) { - mainCompositeCS->Release(); - mainCompositeCS = nullptr; + if (compositePS) { + compositePS->Release(); + compositePS = nullptr; + } + if (compositePSInterior) { + compositePSInterior->Release(); + compositePSInterior = nullptr; } - if (mainCompositeInteriorCS) { - mainCompositeInteriorCS->Release(); - mainCompositeInteriorCS = nullptr; + if (compositeVS) { + compositeVS->Release(); + compositeVS = nullptr; } } -ID3D11ComputeShader* Deferred::GetComputeMainComposite() +ID3D11VertexShader* Deferred::GetCompositeVS() { - if (!mainCompositeCS) { - logger::debug("Compiling DeferredCompositeCS"); + if (!compositeVS) { + logger::debug("Compiling DeferredCompositeVS"); std::vector> defines; - - if (globals::features::dynamicCubemaps.loaded) - defines.push_back({ "DYNAMIC_CUBEMAPS", nullptr }); - - if (globals::features::skylighting.loaded) - defines.push_back({ "SKYLIGHTING", nullptr }); - - if (globals::features::screenSpaceGI.loaded) - defines.push_back({ "SSGI", nullptr }); - - if (globals::features::ibl.loaded) - defines.push_back({ "IBL", nullptr }); - - if (REL::Module::IsVR()) - defines.push_back({ "FRAMEBUFFER", nullptr }); - - if (REL::Module::IsVR()) - defines.push_back({ "VR_STEREO_OPT", nullptr }); - - mainCompositeCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); + compositeVS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeVS.hlsl", defines, "vs_5_0")); } - return mainCompositeCS; + return compositeVS; } -ID3D11ComputeShader* Deferred::GetComputeMainCompositeInterior() +ID3D11PixelShader* Deferred::GetCompositePS(bool interior) { - if (!mainCompositeInteriorCS) { - logger::debug("Compiling DeferredCompositeCS INTERIOR"); + auto& cached = interior ? compositePSInterior : compositePS; + if (!cached) { + logger::debug("Compiling DeferredCompositePS {}", interior ? "INTERIOR" : ""); std::vector> defines; - defines.push_back({ "INTERIOR", nullptr }); + + if (interior) + defines.push_back({ "INTERIOR", nullptr }); if (globals::features::dynamicCubemaps.loaded) defines.push_back({ "DYNAMIC_CUBEMAPS", nullptr }); + if (!interior && globals::features::skylighting.loaded) + defines.push_back({ "SKYLIGHTING", nullptr }); + if (globals::features::screenSpaceGI.loaded) defines.push_back({ "SSGI", nullptr }); @@ -617,9 +639,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainCompositeInterior() if (REL::Module::IsVR()) defines.push_back({ "VR_STEREO_OPT", nullptr }); - mainCompositeInteriorCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); + cached = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositePS.hlsl", defines, "ps_5_0")); } - return mainCompositeInteriorCS; + return cached; } void Deferred::Hooks::Main_RenderShadowMaps::thunk() diff --git a/src/Deferred.h b/src/Deferred.h index 3adb455b76..301d1e7b04 100644 --- a/src/Deferred.h +++ b/src/Deferred.h @@ -1,6 +1,7 @@ #pragma once #include "Buffer.h" +#include #define ALBEDO RE::RENDER_TARGETS::kINDIRECT #define SPECULAR RE::RENDER_TARGETS::kINDIRECT_DOWNSCALED @@ -31,16 +32,24 @@ class Deferred void ClearShaderCache(); - ID3D11ComputeShader* GetComputeMainComposite(); - ID3D11ComputeShader* GetComputeMainCompositeInterior(); + ID3D11PixelShader* GetCompositePS(bool interior); + ID3D11VertexShader* GetCompositeVS(); ID3D11BlendState* deferredBlendStates[7][2][13][2]; ID3D11BlendState* forwardBlendStates[7][2][13][2]; RE::RENDER_TARGET forwardRenderTargets[4]; - ID3D11ComputeShader* mainCompositeCS = nullptr; - ID3D11ComputeShader* mainCompositeInteriorCS = nullptr; + ID3D11PixelShader* compositePS = nullptr; + ID3D11PixelShader* compositePSInterior = nullptr; + ID3D11VertexShader* compositeVS = nullptr; + + winrt::com_ptr compositeBlendState; + winrt::com_ptr compositeDepthStencilState; + winrt::com_ptr compositeRasterizerState; + + std::unique_ptr mainCopy; + std::unique_ptr motionVectorsCopy; bool deferredPass = false; diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h index 4f324395ce..2fa3dfcddb 100644 --- a/src/Features/VRStereoOptimizations.h +++ b/src/Features/VRStereoOptimizations.h @@ -171,7 +171,7 @@ struct VRStereoOptimizations /// Deactivate stencil culling (called from Deferred after geometry rendering completes) void DeactivateStencil(); - /// Get mode texture SRV for external consumers (e.g., DeferredCompositeCS Eye 1 skip) + /// Get mode texture SRV for external consumers (e.g., DeferredCompositePS Eye 1 skip) ID3D11ShaderResourceView* GetModeTextureSRV() const { return texPerPixelMode ? texPerPixelMode->srv.get() : nullptr; } /// Get POM offset texture SRV for StereoBlendCS (reads per-pixel parallax depth offset) diff --git a/src/Utils/D3DStateBackup.h b/src/Utils/D3DStateBackup.h new file mode 100644 index 0000000000..b03740c1c7 --- /dev/null +++ b/src/Utils/D3DStateBackup.h @@ -0,0 +1,153 @@ +#pragma once + +#include + +namespace Util +{ + struct D3DStateBackup + { + static constexpr UINT kNumSRVSlots = 20; + static constexpr UINT kNumSamplerSlots = 2; + static constexpr UINT kNumCBSlots = 14; + + ID3D11InputLayout* iaInputLayout = nullptr; + D3D11_PRIMITIVE_TOPOLOGY iaTopology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + + ID3D11VertexShader* vs = nullptr; + ID3D11Buffer* vsCBs[kNumCBSlots] = {}; + + ID3D11GeometryShader* gs = nullptr; + + ID3D11RasterizerState* rsState = nullptr; + UINT rsNumViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + D3D11_VIEWPORT rsViewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; + + ID3D11PixelShader* ps = nullptr; + ID3D11ShaderResourceView* psSRVs[kNumSRVSlots] = {}; + ID3D11SamplerState* psSamplers[kNumSamplerSlots] = {}; + ID3D11Buffer* psCBs[kNumCBSlots] = {}; + + ID3D11RenderTargetView* omRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* omDSV = nullptr; + ID3D11BlendState* omBlendState = nullptr; + FLOAT omBlendFactor[4] = {}; + UINT omSampleMask = 0; + ID3D11DepthStencilState* omDSState = nullptr; + UINT omStencilRef = 0; + + void Backup(ID3D11DeviceContext* context) + { + context->IAGetInputLayout(&iaInputLayout); + context->IAGetPrimitiveTopology(&iaTopology); + + context->VSGetShader(&vs, nullptr, nullptr); + context->VSGetConstantBuffers(0, kNumCBSlots, vsCBs); + + context->GSGetShader(&gs, nullptr, nullptr); + + context->RSGetState(&rsState); + rsNumViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + context->RSGetViewports(&rsNumViewports, rsViewports); + + context->PSGetShader(&ps, nullptr, nullptr); + context->PSGetShaderResources(0, kNumSRVSlots, psSRVs); + context->PSGetSamplers(0, kNumSamplerSlots, psSamplers); + context->PSGetConstantBuffers(0, kNumCBSlots, psCBs); + + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, omRTVs, &omDSV); + context->OMGetBlendState(&omBlendState, omBlendFactor, &omSampleMask); + context->OMGetDepthStencilState(&omDSState, &omStencilRef); + } + + void Restore(ID3D11DeviceContext* context) + { + context->IASetInputLayout(iaInputLayout); + context->IASetPrimitiveTopology(iaTopology); + + context->VSSetShader(vs, nullptr, 0); + context->VSSetConstantBuffers(0, kNumCBSlots, vsCBs); + + context->GSSetShader(gs, nullptr, 0); + + context->RSSetState(rsState); + context->RSSetViewports(rsNumViewports, rsViewports); + + context->PSSetShader(ps, nullptr, 0); + context->PSSetShaderResources(0, kNumSRVSlots, psSRVs); + context->PSSetSamplers(0, kNumSamplerSlots, psSamplers); + context->PSSetConstantBuffers(0, kNumCBSlots, psCBs); + + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, omRTVs, omDSV); + context->OMSetBlendState(omBlendState, omBlendFactor, omSampleMask); + context->OMSetDepthStencilState(omDSState, omStencilRef); + + Release(); + } + + void Release() + { + if (iaInputLayout) { + iaInputLayout->Release(); + iaInputLayout = nullptr; + } + if (vs) { + vs->Release(); + vs = nullptr; + } + for (auto& cb : vsCBs) { + if (cb) { + cb->Release(); + cb = nullptr; + } + } + if (gs) { + gs->Release(); + gs = nullptr; + } + if (rsState) { + rsState->Release(); + rsState = nullptr; + } + if (ps) { + ps->Release(); + ps = nullptr; + } + for (auto& srv : psSRVs) { + if (srv) { + srv->Release(); + srv = nullptr; + } + } + for (auto& s : psSamplers) { + if (s) { + s->Release(); + s = nullptr; + } + } + for (auto& cb : psCBs) { + if (cb) { + cb->Release(); + cb = nullptr; + } + } + for (auto& rtv : omRTVs) { + if (rtv) { + rtv->Release(); + rtv = nullptr; + } + } + if (omDSV) { + omDSV->Release(); + omDSV = nullptr; + } + if (omBlendState) { + omBlendState->Release(); + omBlendState = nullptr; + } + if (omDSState) { + omDSState->Release(); + omDSState = nullptr; + } + } + }; +} From d5b8c04a904362626f7117aa4fc767824c490f81 Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:50:10 +0100 Subject: [PATCH 02/17] perf: more optimisations --- package/Shaders/DeferredCompositePS.hlsl | 26 --------- src/Deferred.cpp | 70 +++++++++--------------- src/Deferred.h | 4 +- 3 files changed, 26 insertions(+), 74 deletions(-) diff --git a/package/Shaders/DeferredCompositePS.hlsl b/package/Shaders/DeferredCompositePS.hlsl index a7a84a82c1..04f944684b 100644 --- a/package/Shaders/DeferredCompositePS.hlsl +++ b/package/Shaders/DeferredCompositePS.hlsl @@ -3,7 +3,6 @@ #include "Common/Color.hlsli" #include "Common/FrameBuffer.hlsli" #include "Common/GBuffer.hlsli" -#include "Common/MotionBlur.hlsli" #include "Common/Shading.hlsli" #include "Common/SharedData.hlsli" #include "Common/Spherical Harmonics/SphericalHarmonics.hlsli" @@ -17,12 +16,6 @@ Texture2D MasksTexture : register(t3); Texture2D DepthTexture : register(t4); Texture2D MainInputTexture : register(t17); -Texture2D MotionVectorsInputTexture : register(t18); - -#if defined(VR_STEREO_OPT) -# include "VRStereoOptimizations/modes.hlsli" -Texture2D StereoOptModeTexture : register(t16); -#endif #if defined(DYNAMIC_CUBEMAPS) Texture2D ReflectanceTexture : register(t5); @@ -98,7 +91,6 @@ struct PS_OUTPUT { float4 Main : SV_Target0; float4 NormalTAAMaskSpecularMask : SV_Target1; - float4 MotionVectors : SV_Target2; }; PS_OUTPUT main(PS_INPUT input) @@ -110,19 +102,6 @@ PS_OUTPUT main(PS_INPUT input) uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); -#if defined(VR_STEREO_OPT) - if (eyeIndex == 1) { - uint mode = StereoOptModeTexture[uint2(pixCoord)] & 0x0F; - if (mode == MODE_MAIN) { // stencil-culled in Eye 1, filled by ReprojectionCS - PS_OUTPUT discardOutput; - discardOutput.Main = MainInputTexture[pixCoord]; - discardOutput.NormalTAAMaskSpecularMask = float4(0, 0, 0, 0); - discardOutput.MotionVectors = float4(MotionVectorsInputTexture[pixCoord], 0, 0); - return discardOutput; - } - } -#endif - uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); float3 normalGlossiness = NormalRoughnessTexture[pixCoord]; @@ -137,10 +116,6 @@ PS_OUTPUT main(PS_INPUT input) positionWS = mul(FrameBuffer::CameraViewProjInverse[eyeIndex], positionWS); positionWS.xyz = positionWS.xyz / positionWS.w; - float2 motionVectorsOutput = MotionVectorsInputTexture[pixCoord]; - if (depth == 1.0) - motionVectorsOutput = MotionBlur::GetSSMotionVector(positionWS, positionWS, eyeIndex); - float glossiness = normalGlossiness.z; float3 linDiffuseColor = Color::IrradianceToLinear(diffuseColor); @@ -350,6 +325,5 @@ PS_OUTPUT main(PS_INPUT input) PS_OUTPUT output; output.Main = float4(color, 1.0); output.NormalTAAMaskSpecularMask = float4(GBuffer::EncodeNormalVanilla(normalVS), 0.0, 0.0); - output.MotionVectors = float4(motionVectorsOutput, 0.0, 0.0); return output; } diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 8d09be9d79..2bab17aa1b 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -136,33 +136,7 @@ void Deferred::SetupResources() { auto device = globals::d3d::device; - D3D11_TEXTURE2D_DESC mainTexDesc{}; - renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN].texture->GetDesc(&mainTexDesc); - - D3D11_TEXTURE2D_DESC copyDesc = mainTexDesc; - copyDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - copyDesc.MiscFlags = 0; - mainCopy = std::make_unique(copyDesc); - mainCopy->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ - .Format = copyDesc.Format, - .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, - .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); - - D3D11_TEXTURE2D_DESC mvTexDesc{}; - renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR].texture->GetDesc(&mvTexDesc); - - D3D11_TEXTURE2D_DESC mvCopyDesc = mvTexDesc; - mvCopyDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - mvCopyDesc.MiscFlags = 0; - motionVectorsCopy = std::make_unique(mvCopyDesc); - motionVectorsCopy->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ - .Format = mvCopyDesc.Format, - .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, - .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); - D3D11_BLEND_DESC blendDesc{}; - blendDesc.IndependentBlendEnable = FALSE; - blendDesc.RenderTarget[0].BlendEnable = FALSE; blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, compositeBlendState.put())); @@ -172,6 +146,19 @@ void Deferred::SetupResources() dsDesc.StencilEnable = FALSE; DX::ThrowIfFailed(device->CreateDepthStencilState(&dsDesc, compositeDepthStencilState.put())); + D3D11_DEPTH_STENCIL_DESC stencilDsDesc{}; + stencilDsDesc.DepthEnable = FALSE; + stencilDsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + stencilDsDesc.StencilEnable = TRUE; + stencilDsDesc.StencilReadMask = 0xFF; + stencilDsDesc.StencilWriteMask = 0x00; + stencilDsDesc.FrontFace.StencilFunc = D3D11_COMPARISON_NOT_EQUAL; + stencilDsDesc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + stencilDsDesc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + stencilDsDesc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + stencilDsDesc.BackFace = stencilDsDesc.FrontFace; + DX::ThrowIfFailed(device->CreateDepthStencilState(&stencilDsDesc, compositeStencilDSState.put())); + D3D11_RASTERIZER_DESC rsDesc{}; rsDesc.FillMode = D3D11_FILL_SOLID; rsDesc.CullMode = D3D11_CULL_NONE; @@ -337,8 +324,6 @@ void Deferred::DeferredPasses() auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; auto reflectance = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; - auto motionVectors = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR]; - bool interior = Util::IsInterior(); auto& skylighting = globals::features::skylighting; @@ -366,8 +351,10 @@ void Deferred::DeferredPasses() Util::D3DStateBackup stateBackup; stateBackup.Backup(context); - context->CopyResource(mainCopy->resource.get(), main.texture); - context->CopyResource(motionVectorsCopy->resource.get(), motionVectors.texture); + auto& mainCopy = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN_COPY]; + float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); + D3D11_BOX srcBox = { 0, 0, 0, (UINT)resolution.x, (UINT)resolution.y, 1 }; + context->CopySubresourceRegion(mainCopy.texture, 0, 0, 0, 0, main.texture, 0, &srcBox); // Constant buffers { @@ -383,7 +370,7 @@ void Deferred::DeferredPasses() } // SRVs - ID3D11ShaderResourceView* srvs[19]{ + ID3D11ShaderResourceView* srvs[18]{ specular.SRV, albedo.SRV, normalRoughness.SRV, @@ -401,27 +388,23 @@ void Deferred::DeferredPasses() ibl.loaded ? ibl.envIBLTexture->srv.get() : nullptr, ibl.loaded ? ibl.skyIBLTexture->srv.get() : nullptr, nullptr, - mainCopy->srv.get(), - motionVectorsCopy->srv.get(), + mainCopy.SRV, }; - auto& vrStereoOpt = globals::features::vr.stereoOpt; - bool stereoCullingReady = globals::features::vr.IsStereoOptimizationCullingReady(); - srvs[16] = stereoCullingReady ? vrStereoOpt.GetModeTextureSRV() : nullptr; - context->PSSetShaderResources(0, ARRAYSIZE(srvs), srvs); if (dynamicCubemaps.loaded) context->PSSetSamplers(0, 1, &linearSampler); - // Render targets - ID3D11RenderTargetView* rtvs[3]{ main.RTV, normals.RTV, motionVectors.RTV }; - context->OMSetRenderTargets(ARRAYSIZE(rtvs), rtvs, nullptr); + // Render targets + stencil test for VR stereo culling + bool useStencil = globals::game::isVR && globals::features::vr.stereoOpt.IsStencilActive(); + ID3D11RenderTargetView* rtvs[2]{ main.RTV, normals.RTV }; + ID3D11DepthStencilView* dsv = useStencil ? depth.views[0] : nullptr; + context->OMSetRenderTargets(ARRAYSIZE(rtvs), rtvs, dsv); context->OMSetBlendState(compositeBlendState.get(), nullptr, 0xFFFFFFFF); - context->OMSetDepthStencilState(compositeDepthStencilState.get(), 0); + context->OMSetDepthStencilState(useStencil ? compositeStencilDSState.get() : compositeDepthStencilState.get(), 1); // Viewport - float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); D3D11_VIEWPORT vp{}; vp.Width = resolution.x; vp.Height = resolution.y; @@ -636,9 +619,6 @@ ID3D11PixelShader* Deferred::GetCompositePS(bool interior) if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); - if (REL::Module::IsVR()) - defines.push_back({ "VR_STEREO_OPT", nullptr }); - cached = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositePS.hlsl", defines, "ps_5_0")); } return cached; diff --git a/src/Deferred.h b/src/Deferred.h index 301d1e7b04..330979134b 100644 --- a/src/Deferred.h +++ b/src/Deferred.h @@ -46,11 +46,9 @@ class Deferred winrt::com_ptr compositeBlendState; winrt::com_ptr compositeDepthStencilState; + winrt::com_ptr compositeStencilDSState; winrt::com_ptr compositeRasterizerState; - std::unique_ptr mainCopy; - std::unique_ptr motionVectorsCopy; - bool deferredPass = false; ID3D11SamplerState* linearSampler = nullptr; From 1944cd7351f87fe3ef96624a9db10b08901eacf2 Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:51:21 +0100 Subject: [PATCH 03/17] chore: delete cs shader --- package/Shaders/DeferredCompositeCS.hlsl | 335 ----------------------- 1 file changed, 335 deletions(-) delete mode 100644 package/Shaders/DeferredCompositeCS.hlsl diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl deleted file mode 100644 index e666f48a37..0000000000 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ /dev/null @@ -1,335 +0,0 @@ - -#include "Common/BRDF.hlsli" -#include "Common/Color.hlsli" -#include "Common/FrameBuffer.hlsli" -#include "Common/GBuffer.hlsli" -#include "Common/MotionBlur.hlsli" -#include "Common/Shading.hlsli" -#include "Common/SharedData.hlsli" -#include "Common/Spherical Harmonics/SphericalHarmonics.hlsli" -#include "Common/VR.hlsli" - -Texture2D SpecularTexture : register(t0); -Texture2D AlbedoTexture : register(t1); -Texture2D NormalRoughnessTexture : register(t2); -Texture2D MasksTexture : register(t3); - -RWTexture2D MainRW : register(u0); -RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); -RWTexture2D MotionVectorsRW : register(u2); -Texture2D DepthTexture : register(t4); - -#if defined(VR_STEREO_OPT) -# include "VRStereoOptimizations/modes.hlsli" -Texture2D StereoOptModeTexture : register(t16); -#endif - -#if defined(DYNAMIC_CUBEMAPS) -Texture2D ReflectanceTexture : register(t5); -TextureCube EnvTexture : register(t6); -TextureCube EnvReflectionsTexture : register(t7); - -SamplerState LinearSampler : register(s0); -#endif - -#if defined(SKYLIGHTING) -# include "Skylighting/Skylighting.hlsli" - -Texture3D SkylightingProbeArray : register(t8); -Texture2DArray stbn_vec3_2Dx1D_128x128x64 : register(t9); - -#endif - -#if defined(SSGI) -Texture2D SsgiAoTexture : register(t10); -Texture2D SsgiYTexture : register(t11); -Texture2D SsgiCoCgTexture : register(t12); -Texture2D SsgiSpecularTexture : register(t13); - -void SampleSSGI(uint2 pixCoord, float3 normalWS, out float ao, out float3 il) -{ - ao = 1 - SsgiAoTexture[pixCoord]; - float4 ssgiIlYSh = SsgiYTexture[pixCoord]; - // without ZH hallucination - // float ssgiIlY = SphericalHarmonics::FuncProductIntegral(ssgiIlYSh, SphericalHarmonics::EvaluateCosineLobe(normalWS)); - float ssgiIlY = SphericalHarmonics::SHHallucinateZH3Irradiance(ssgiIlYSh, normalWS); - float2 ssgiIlCoCg = SsgiCoCgTexture[pixCoord]; - il = max(0, Color::YCoCgToRGB(float3(ssgiIlY, ssgiIlCoCg))); -} - -void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, in float3 normal, in float3 view, in float roughness) -{ - float NdotV = dot(normal, view); - float alpha = roughness * roughness; - ao = SpecularOcclusion(saturate(NdotV), alpha, ao); - - float4 ssgiIlYSh = SsgiYTexture[pixCoord]; - float ssgiIlY = SphericalHarmonics::FuncProductIntegral(ssgiIlYSh, lobe); - float2 ssgiIlCoCg = SsgiCoCgTexture[pixCoord].xy; - - // pi to compensate for the /pi in specularLobe - // i don't think there really should be a 1/PI but without it the specular is too strong - // reflectance being ambient reflectance doesn't help either - il = max(0, Color::YCoCgToRGB(float3(ssgiIlY, ssgiIlCoCg / Math::PI))); - - // HQ spec - float4 hq_spec = SsgiSpecularTexture[pixCoord]; - ao *= 1 - hq_spec.a; - il += hq_spec.rgb; -} -#endif - -#if defined(IBL) -# if !defined(DYNAMIC_CUBEMAPS) -# undef IBL -# else -# define IBL_DEFERRED -# include "IBL/IBL.hlsli" -# endif -#endif - -[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { - // Early exit if dispatch thread is outside screen bounds - if (any(dispatchID.xy >= uint2(SharedData::BufferDim.xy))) - return; - - float2 uv = float2(dispatchID.xy + 0.5) * SharedData::BufferDim.zw; - uv *= FrameBuffer::DynamicResolutionParams2.xy; // adjust for dynamic res - - uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); - -#if defined(VR_STEREO_OPT) - if (eyeIndex == 1) { - uint mode = StereoOptModeTexture[uint2(dispatchID.xy)] & 0x0F; - if (mode == MODE_MAIN) { // stencil-culled in Eye 1, filled by ReprojectionCS - return; - } - } -#endif - - uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); - - float3 normalGlossiness = NormalRoughnessTexture[dispatchID.xy]; - float3 normalVS = GBuffer::DecodeNormal(normalGlossiness.xy); - - float3 diffuseColor = MainRW[dispatchID.xy].xyz; - float3 specularColor = SpecularTexture[dispatchID.xy]; - float3 albedo = AlbedoTexture[dispatchID.xy]; - - float depth = DepthTexture[dispatchID.xy]; - float4 positionWS = float4(2 * float2(uv.x, -uv.y + 1) - 1, depth, 1); - positionWS = mul(FrameBuffer::CameraViewProjInverse[eyeIndex], positionWS); - positionWS.xyz = positionWS.xyz / positionWS.w; - - if (depth == 1.0) - MotionVectorsRW[dispatchID.xy] = MotionBlur::GetSSMotionVector(positionWS, positionWS, eyeIndex); // Apply sky motion vectors - - float glossiness = normalGlossiness.z; - - float3 linDiffuseColor = Color::IrradianceToLinear(diffuseColor); - float3 normalWS = normalize(mul(FrameBuffer::CameraViewInverse[eyeIndex], float4(normalVS, 0)).xyz); - -#if defined(SSGI) - - float ssgiAo; - float3 ssgiIl; - SampleSSGI(dispatchID.xy, normalWS, ssgiAo, ssgiIl); - - float3 linAlbedo = Color::IrradianceToLinear(albedo / Color::PBRLightingScale); - float3 multiBounceSSGIAo = MultiBounceAO(linAlbedo, ssgiAo); - - float3 directionalAmbientColor = 0; - -# if defined(IBL) - if (SharedData::iblSettings.EnableIBL) { - float3 vanillaDALC = Color::Ambient(max(0, SharedData::GetAmbient(normalWS))); - -# if defined(SKYLIGHTING) -# if defined(VR) - float3 positionMS = positionWS.xyz + FrameBuffer::CameraPosAdjust[eyeIndex].xyz - FrameBuffer::CameraPosAdjust[0].xyz; -# else - float3 positionMS = positionWS.xyz; -# endif - sh2 skylightingSH = Skylighting::sample(SharedData::skylightingSettings, SkylightingProbeArray, stbn_vec3_2Dx1D_128x128x64, dispatchID.xy, positionMS.xyz, normalWS); - float skylightingDiffuse = SphericalHarmonics::FuncProductIntegral(skylightingSH, SphericalHarmonics::EvaluateCosineLobe(normalWS)) / Math::PI; - skylightingDiffuse = saturate(skylightingDiffuse); - skylightingDiffuse = Skylighting::mixDiffuse(SharedData::skylightingSettings, skylightingDiffuse); - directionalAmbientColor = ImageBasedLighting::GetDiffuseIBLOccluded(vanillaDALC, -normalWS, skylightingDiffuse) * albedo; -# else - directionalAmbientColor = ImageBasedLighting::GetDiffuseIBL(vanillaDALC, -normalWS) * albedo; -# endif - - directionalAmbientColor = Color::RGBToYCoCg(directionalAmbientColor); - directionalAmbientColor.x = MasksTexture[dispatchID.xy].z; - directionalAmbientColor = Color::YCoCgToRGB(directionalAmbientColor); - directionalAmbientColor = max(0, directionalAmbientColor); - } else -# endif - { - directionalAmbientColor = Color::Ambient(max(0, SharedData::GetAmbient(normalWS))); - directionalAmbientColor *= albedo; - - directionalAmbientColor = Color::RGBToYCoCg(directionalAmbientColor); - directionalAmbientColor.x = MasksTexture[dispatchID.xy].z; - directionalAmbientColor = Color::YCoCgToRGB(directionalAmbientColor); - directionalAmbientColor = max(0, directionalAmbientColor); - } - - { - float maxScale = 1.0; - if (directionalAmbientColor.x > 0.0) - maxScale = min(maxScale, diffuseColor.x / directionalAmbientColor.x); - if (directionalAmbientColor.y > 0.0) - maxScale = min(maxScale, diffuseColor.y / directionalAmbientColor.y); - if (directionalAmbientColor.z > 0.0) - maxScale = min(maxScale, diffuseColor.z / directionalAmbientColor.z); - directionalAmbientColor *= maxScale; - - diffuseColor = max(0.0, diffuseColor - directionalAmbientColor); - linDiffuseColor = Color::IrradianceToLinear(diffuseColor); - linDiffuseColor *= sqrt(multiBounceSSGIAo); - diffuseColor = Color::IrradianceToGamma(linDiffuseColor); - diffuseColor += Color::IrradianceToGamma(Color::IrradianceToLinear(directionalAmbientColor) * multiBounceSSGIAo); - linDiffuseColor = Color::IrradianceToLinear(diffuseColor); - } - - linDiffuseColor += ssgiIl * linAlbedo; -#endif - - float3 color = linDiffuseColor + specularColor; - -#if defined(DYNAMIC_CUBEMAPS) - - float3 reflectance = ReflectanceTexture[dispatchID.xy]; - - if (any(reflectance > 0.0)) { - float3 V = -normalize(positionWS.xyz); - float3 R = reflect(-V, normalWS); - - float roughness = 1.0 - glossiness; - float level = roughness * 7.0; - - sh2 specularLobe = SphericalHarmonics::FauxSpecularLobe(normalWS, V, roughness); - - float3 finalIrradiance = 0; - - float directionalAmbientColorSpecular = Color::RGBToLuminance(Color::Ambient(max(0, SharedData::GetAmbient(R)))) * Color::ReflectionNormalisationScale; - -# if defined(SKYLIGHTING) -# if defined(VR) - float3 positionMS = positionWS.xyz + FrameBuffer::CameraPosAdjust[eyeIndex].xyz - FrameBuffer::CameraPosAdjust[0].xyz; -# else - float3 positionMS = positionWS.xyz; -# endif - - sh2 skylighting = Skylighting::sample(SharedData::skylightingSettings, SkylightingProbeArray, stbn_vec3_2Dx1D_128x128x64, dispatchID.xy, positionMS.xyz, R); - - float skylightingSpecular = SphericalHarmonics::FuncProductIntegral(skylighting, specularLobe); - skylightingSpecular = saturate(skylightingSpecular); - skylightingSpecular = Skylighting::mixSpecular(SharedData::skylightingSettings, skylightingSpecular); -# endif - -# if defined(IBL) - if (SharedData::iblSettings.EnableIBL) { - float3 envSample = EnvTexture.SampleLevel(LinearSampler, R, level); - float3 fullSample = EnvReflectionsTexture.SampleLevel(LinearSampler, R, level); - float3 envSpecular, skySpecular; - - if (SharedData::iblSettings.DALCMode >= 2) { - // Mode 2/3: DALC-normalized env scaled by DALCAmount + sky overlay - float envLum = Color::RGBToLuminance(EnvTexture.SampleLevel(LinearSampler, R, 15)); - envSpecular = Color::IrradianceToLinear((envSample / max(envLum, 0.001)) * directionalAmbientColorSpecular) * SharedData::iblSettings.DALCAmount; - skySpecular = Color::IrradianceToLinear(max(0, fullSample - envSample)) * SharedData::iblSettings.SkyIBLScale; -# if defined(SKYLIGHTING) - envSpecular *= (SharedData::iblSettings.DALCMode == 3) ? skylightingSpecular : 1.0; - skySpecular *= skylightingSpecular; -# elif defined(INTERIOR) - skySpecular = 0; -# endif - } else { - // Mode 0/1: IBL ratio-based - float3 ratio = ImageBasedLighting::GetIBLRatio(); - envSpecular = Color::IrradianceToLinear(envSample * ratio) * SharedData::iblSettings.EnvIBLScale; - skySpecular = Color::IrradianceToLinear(max(0, fullSample - envSample)) * SharedData::iblSettings.SkyIBLScale; -# if defined(SKYLIGHTING) - skySpecular *= skylightingSpecular; -# elif defined(INTERIOR) - skySpecular = 0; -# endif - } - - finalIrradiance = envSpecular + skySpecular; - } else -# endif - { - // Fallback without IBL: normalize-by-luminance with DALC -# if defined(INTERIOR) - float3 specularIrradiance = EnvTexture.SampleLevel(LinearSampler, R, level); - float specularIrradianceLuminance = Color::RGBToLuminance(EnvTexture.SampleLevel(LinearSampler, R, 15)); - specularIrradiance = (specularIrradiance / max(specularIrradianceLuminance, 0.001)) * directionalAmbientColorSpecular; - finalIrradiance = Color::IrradianceToLinear(specularIrradiance); -# elif defined(SKYLIGHTING) - float3 specularIrradianceReflections = 0.0; - if (skylightingSpecular > 0.0) { - specularIrradianceReflections = EnvReflectionsTexture.SampleLevel(LinearSampler, R, level); - float lum = Color::RGBToLuminance(EnvReflectionsTexture.SampleLevel(LinearSampler, R, 15)); - specularIrradianceReflections = (specularIrradianceReflections / max(lum, 0.001)) * directionalAmbientColorSpecular; - specularIrradianceReflections = Color::IrradianceToLinear(specularIrradianceReflections); - } - float3 specularIrradiance = 0.0; - if (skylightingSpecular < 1.0) { - specularIrradiance = EnvTexture.SampleLevel(LinearSampler, R, level); - float lum = Color::RGBToLuminance(EnvTexture.SampleLevel(LinearSampler, R, 15)); - float dalcScaled = Color::IrradianceToGamma(Color::IrradianceToLinear(directionalAmbientColorSpecular) * skylightingSpecular); - specularIrradiance = (specularIrradiance / max(lum, 0.001)) * dalcScaled; - specularIrradiance = Color::IrradianceToLinear(specularIrradiance); - } - finalIrradiance = lerp(specularIrradiance, specularIrradianceReflections, skylightingSpecular); -# else - float3 specularIrradiance = EnvReflectionsTexture.SampleLevel(LinearSampler, R, level); - float specularIrradianceLuminance = Color::RGBToLuminance(EnvReflectionsTexture.SampleLevel(LinearSampler, R, 15)); - specularIrradiance = (specularIrradiance / max(specularIrradianceLuminance, 0.001)) * directionalAmbientColorSpecular; - finalIrradiance = Color::IrradianceToLinear(specularIrradiance); -# endif - } - -# if defined(SSGI) - float3 ssgiIlSpecular; - SampleSSGISpecular(dispatchID.xy, specularLobe, ssgiAo, ssgiIlSpecular, normalWS, V, roughness); - - finalIrradiance = (finalIrradiance * ssgiAo); - - ssgiIlSpecular = Color::RGBToYCoCg(ssgiIlSpecular); - ssgiIlSpecular = max(0, Color::YCoCgToRGB(float3(ssgiIlSpecular.x, lerp(ssgiIlSpecular.yz, Color::RGBToYCoCg(finalIrradiance).yz, 0.5)))); - - finalIrradiance += ssgiIlSpecular; -# endif - - color += reflectance * finalIrradiance; - } - -#endif - - color = Color::IrradianceToGamma(color); - -#if defined(DEBUG) - -# if defined(VR) - uv.x += (eyeIndex ? 0.1 : -0.1); -# endif // VR - - if (uv.x < 0.5 && uv.y < 0.5) { - color = color; - } else if (uv.x < 0.5) { - color = albedo; - } else if (uv.y < 0.5) { - color = normalVS; - } else { - color = glossiness; - } - -#endif - - MainRW[dispatchID.xy] = float4(color, 1.0); - NormalTAAMaskSpecularMaskRW[dispatchID.xy] = float4(GBuffer::EncodeNormalVanilla(normalVS), 0.0, 0.0); -} \ No newline at end of file From 1f6e17e6b2049019212fb33898dffdfbd4c236a3 Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:23:14 +0100 Subject: [PATCH 04/17] perf: more optimisations --- package/Shaders/Common/GBuffer.hlsli | 36 +++------- package/Shaders/DeferredCompositePS.hlsl | 2 - package/Shaders/Tests/TestGBuffer.hlsl | 91 +++++------------------- src/Deferred.cpp | 3 +- 4 files changed, 29 insertions(+), 103 deletions(-) diff --git a/package/Shaders/Common/GBuffer.hlsli b/package/Shaders/Common/GBuffer.hlsli index 8cca6bc7c0..9835d13792 100644 --- a/package/Shaders/Common/GBuffer.hlsli +++ b/package/Shaders/Common/GBuffer.hlsli @@ -1,42 +1,26 @@ #ifndef __GBUFFER_DEPENDENCY_HLSL__ #define __GBUFFER_DEPENDENCY_HLSL__ -// https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding/ - namespace GBuffer { - half2 OctWrap(half2 v) - { - return (1.0h - abs(v.yx)) * (v.xy >= 0.0h ? 1.0h : -1.0h); - } - half2 EncodeNormal(half3 n) - { - n = -n; - n /= (abs(n.x) + abs(n.y) + abs(n.z)); - n.xy = n.z >= 0.0h ? n.xy : OctWrap(n.xy); - n.xy = n.xy * 0.5h + 0.5h; - return n.xy; - } - - half3 DecodeNormal(half2 f) - { - f = f * 2.0h - 1.0h; - // https://twitter.com/Stubbesaurus/status/937994790553227264 - half3 n = half3(f.x, f.y, 1.0h - abs(f.x) - abs(f.y)); - half t = saturate(-n.z); - n.xy += n.xy >= 0.0h ? -t : t; - return -normalize(n); - } - - half2 EncodeNormalVanilla(half3 n) { n.z = max(1.0h / 1000.0h, sqrt(8.0h + -8.0h * n.z)); n.xy /= n.z; return n.xy + 0.5h; } + half3 DecodeNormal(half2 enc) + { + float2 fenc = enc * 4.0 - 2.0; + float f = dot(fenc, fenc); + float3 n; + n.xy = fenc * sqrt(1.0 - f / 4.0); + n.z = f / 2.0 - 1.0; + return n; + } + } #endif // __GBUFFER_DEPENDENCY_HLSL__ \ No newline at end of file diff --git a/package/Shaders/DeferredCompositePS.hlsl b/package/Shaders/DeferredCompositePS.hlsl index 04f944684b..d26b382333 100644 --- a/package/Shaders/DeferredCompositePS.hlsl +++ b/package/Shaders/DeferredCompositePS.hlsl @@ -90,7 +90,6 @@ struct PS_INPUT struct PS_OUTPUT { float4 Main : SV_Target0; - float4 NormalTAAMaskSpecularMask : SV_Target1; }; PS_OUTPUT main(PS_INPUT input) @@ -324,6 +323,5 @@ PS_OUTPUT main(PS_INPUT input) PS_OUTPUT output; output.Main = float4(color, 1.0); - output.NormalTAAMaskSpecularMask = float4(GBuffer::EncodeNormalVanilla(normalVS), 0.0, 0.0); return output; } diff --git a/package/Shaders/Tests/TestGBuffer.hlsl b/package/Shaders/Tests/TestGBuffer.hlsl index abdfb8d587..ec2d03c246 100644 --- a/package/Shaders/Tests/TestGBuffer.hlsl +++ b/package/Shaders/Tests/TestGBuffer.hlsl @@ -1,18 +1,16 @@ // HLSL Unit Tests for Common/GBuffer.hlsli -// Note: GBuffer uses half types - we use half throughout to avoid conversion warnings #include "/Shaders/Common/GBuffer.hlsli" #include "/Test/STF/ShaderTestFramework.hlsli" /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingRoundtrip() { - // Test that encoding and decoding normals is reversible half3 testNormals[6] = { - half3(0.0h, 0.0h, 1.0h), // Up - half3(0.0h, 0.0h, -1.0h), // Down - half3(1.0h, 0.0h, 0.0h), // Right - half3(-1.0h, 0.0h, 0.0h), // Left - half3(0.0h, 1.0h, 0.0h), // Forward - half3(0.0h, -1.0h, 0.0h) // Back + half3(0.0h, 0.0h, 1.0h), + half3(0.0h, 0.0h, -1.0h), + half3(1.0h, 0.0h, 0.0h), + half3(-1.0h, 0.0h, 0.0h), + half3(0.0h, 1.0h, 0.0h), + half3(0.0h, -1.0h, 0.0h) }; for (int i = 0; i < 6; i++) { @@ -20,22 +18,14 @@ half2 encoded = GBuffer::EncodeNormal(original); half3 decoded = GBuffer::DecodeNormal(encoded); - // Check that decoded normal is close to original - ASSERT(IsTrue, abs(decoded.x - original.x) < 0.01h); - ASSERT(IsTrue, abs(decoded.y - original.y) < 0.01h); - ASSERT(IsTrue, abs(decoded.z - original.z) < 0.01h); - - // Encoded values should be in [0, 1] range - ASSERT(IsTrue, encoded.x >= 0.0h && encoded.x <= 1.0h); - ASSERT(IsTrue, encoded.y >= 0.0h && encoded.y <= 1.0h); + ASSERT(IsTrue, abs(decoded.x - original.x) < 0.05h); + ASSERT(IsTrue, abs(decoded.y - original.y) < 0.05h); + ASSERT(IsTrue, abs(decoded.z - original.z) < 0.05h); } } - /// @tags gbuffer, normal, encoding - [numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() -{ - // Test behavioral properties of octahedral encoding (not exact numerical accuracy) - // Half precision + quantization means we check: valid output, normalized, reasonable direction +/// @tags gbuffer, normal, encoding +[numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() { half3 testNormals[4] = { normalize(half3(1.0h, 1.0h, 1.0h)), normalize(half3(-1.0h, 1.0h, 1.0h)), @@ -48,64 +38,19 @@ half2 encoded = GBuffer::EncodeNormal(original); half3 decoded = GBuffer::DecodeNormal(encoded); - // Check behavioral properties (relaxed for half precision quantization): - // 1. Encoded values are in valid range [0, 1] - ASSERT(IsTrue, encoded.x >= 0.0h && encoded.x <= 1.0h); - ASSERT(IsTrue, encoded.y >= 0.0h && encoded.y <= 1.0h); - - // 2. Decoded normal is normalized (unit length) half length = sqrt(decoded.x * decoded.x + decoded.y * decoded.y + decoded.z * decoded.z); - ASSERT(IsTrue, abs(length - 1.0h) < 0.02h); // Relaxed tolerance for half precision + ASSERT(IsTrue, abs(length - 1.0h) < 0.05h); } } /// @tags gbuffer, normal, encoding -[numthreads(1, 1, 1)] void TestOctWrap() { - // Test behavioral properties of OctWrap (not exact numerical values) - // Half precision ternary operators have quantization, so check valid output ranges - - // Test 1: Positive inputs should produce outputs in valid range - half2 v1 = half2(0.5h, 0.5h); - half2 wrapped1 = GBuffer::OctWrap(v1); - ASSERT(IsTrue, wrapped1.x >= 0.0h && wrapped1.x <= 1.0h); - ASSERT(IsTrue, wrapped1.y >= 0.0h && wrapped1.y <= 1.0h); - - // Test 2: Negative inputs should produce outputs in valid range - half2 v2 = half2(-0.3h, 0.7h); - half2 wrapped2 = GBuffer::OctWrap(v2); - ASSERT(IsTrue, wrapped2.x >= -1.0h && wrapped2.x <= 1.0h); - ASSERT(IsTrue, wrapped2.y >= -1.0h && wrapped2.y <= 1.0h); - - // Test 3: Mixed signs should produce outputs in valid range - half2 v3 = half2(0.2h, -0.8h); - half2 wrapped3 = GBuffer::OctWrap(v3); - ASSERT(IsTrue, wrapped3.x >= -1.0h && wrapped3.x <= 1.0h); - ASSERT(IsTrue, wrapped3.y >= -1.0h && wrapped3.y <= 1.0h); -} - - /// @tags gbuffer, normal, encoding - [numthreads(1, 1, 1)] void TestVanillaNormalEncoding() -{ - // Test vanilla normal encoding with known normals +[numthreads(1, 1, 1)] void TestNormalEncodingUpNormal() { half3 upNormal = half3(0.0h, 0.0h, 1.0h); - half2 encoded = GBuffer::EncodeNormalVanilla(upNormal); + half2 encoded = GBuffer::EncodeNormal(upNormal); - // For up normal (0,0,1): z = sqrt(8 + -8*1) = sqrt(0) ≈ tiny value - // Result should be near (0.5, 0.5) due to the +0.5 offset - ASSERT(IsTrue, abs(encoded.x - 0.5h) < 0.2h); - ASSERT(IsTrue, abs(encoded.y - 0.5h) < 0.2h); + ASSERT(IsTrue, abs(encoded.x - 0.5h) < 0.01h); + ASSERT(IsTrue, abs(encoded.y - 0.5h) < 0.01h); - // Test that encoding produces values in reasonable range - half3 testNormals[3] = { - normalize(half3(1.0h, 0.0h, 0.0h)), - normalize(half3(0.0h, 1.0h, 0.0h)), - normalize(half3(1.0h, 1.0h, 0.0h)) - }; - - for (int i = 0; i < 3; i++) { - half2 enc = GBuffer::EncodeNormalVanilla(testNormals[i]); - // Encoded values should be in a reasonable range (not infinite or NaN) - ASSERT(IsTrue, enc.x >= -10.0h && enc.x <= 10.0h); - ASSERT(IsTrue, enc.y >= -10.0h && enc.y <= 10.0h); - } + half3 decoded = GBuffer::DecodeNormal(encoded); + ASSERT(IsTrue, abs(decoded.z - 1.0h) < 0.01h); } diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 2bab17aa1b..4cd8bf6867 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -320,7 +320,6 @@ void Deferred::DeferredPasses() auto masks = renderer->GetRuntimeData().renderTargets[MASKS]; auto main = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[0]]; - auto normals = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[2]]; auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; auto reflectance = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; @@ -398,7 +397,7 @@ void Deferred::DeferredPasses() // Render targets + stencil test for VR stereo culling bool useStencil = globals::game::isVR && globals::features::vr.stereoOpt.IsStencilActive(); - ID3D11RenderTargetView* rtvs[2]{ main.RTV, normals.RTV }; + ID3D11RenderTargetView* rtvs[1]{ main.RTV }; ID3D11DepthStencilView* dsv = useStencil ? depth.views[0] : nullptr; context->OMSetRenderTargets(ARRAYSIZE(rtvs), rtvs, dsv); context->OMSetBlendState(compositeBlendState.get(), nullptr, 0xFFFFFFFF); From 6fa9405b0076a90903546d931299430d532d87fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:27:41 +0000 Subject: [PATCH 05/17] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commi?= =?UTF-8?q?t.ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- package/Shaders/DeferredCompositePS.hlsl | 6 +++--- package/Shaders/DeferredCompositeVS.hlsl | 4 ++-- package/Shaders/Tests/TestGBuffer.hlsl | 5 +++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/package/Shaders/DeferredCompositePS.hlsl b/package/Shaders/DeferredCompositePS.hlsl index d26b382333..f09a1c1f96 100644 --- a/package/Shaders/DeferredCompositePS.hlsl +++ b/package/Shaders/DeferredCompositePS.hlsl @@ -83,13 +83,13 @@ void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, struct PS_INPUT { - float4 Position : SV_Position; - float2 TexCoord : TEXCOORD0; + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; }; struct PS_OUTPUT { - float4 Main : SV_Target0; + float4 Main: SV_Target0; }; PS_OUTPUT main(PS_INPUT input) diff --git a/package/Shaders/DeferredCompositeVS.hlsl b/package/Shaders/DeferredCompositeVS.hlsl index 517654f57d..b4b8d394ad 100644 --- a/package/Shaders/DeferredCompositeVS.hlsl +++ b/package/Shaders/DeferredCompositeVS.hlsl @@ -1,7 +1,7 @@ struct VS_OUTPUT { - float4 Position : SV_Position; - float2 TexCoord : TEXCOORD0; + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; }; VS_OUTPUT main(uint vertexID : SV_VertexID) diff --git a/package/Shaders/Tests/TestGBuffer.hlsl b/package/Shaders/Tests/TestGBuffer.hlsl index ec2d03c246..c2635aaabc 100644 --- a/package/Shaders/Tests/TestGBuffer.hlsl +++ b/package/Shaders/Tests/TestGBuffer.hlsl @@ -24,8 +24,9 @@ } } -/// @tags gbuffer, normal, encoding -[numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() { + /// @tags gbuffer, normal, encoding + [numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() +{ half3 testNormals[4] = { normalize(half3(1.0h, 1.0h, 1.0h)), normalize(half3(-1.0h, 1.0h, 1.0h)), From 80a7d1f0b6e2d2093aaa1d41a67c312817f2fddd Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:30:18 +0100 Subject: [PATCH 06/17] refactor: clean up bindings --- features/IBL/Shaders/IBL/IBL.hlsli | 4 +- package/Shaders/DeferredCompositePS.hlsl | 71 ++++++++++++++++-------- src/Deferred.cpp | 37 ++++++------ 3 files changed, 68 insertions(+), 44 deletions(-) diff --git a/features/IBL/Shaders/IBL/IBL.hlsli b/features/IBL/Shaders/IBL/IBL.hlsli index e5dcb0f787..26878c2e72 100644 --- a/features/IBL/Shaders/IBL/IBL.hlsli +++ b/features/IBL/Shaders/IBL/IBL.hlsli @@ -10,8 +10,8 @@ namespace ImageBasedLighting { #if defined(IBL_DEFERRED) - Texture2D EnvIBLTexture : register(t14); - Texture2D SkyIBLTexture : register(t15); + Texture2D EnvIBLTexture : register(t15); + Texture2D SkyIBLTexture : register(t16); #else Texture2D EnvIBLTexture : register(t76); Texture2D SkyIBLTexture : register(t77); diff --git a/package/Shaders/DeferredCompositePS.hlsl b/package/Shaders/DeferredCompositePS.hlsl index d26b382333..8c844a7481 100644 --- a/package/Shaders/DeferredCompositePS.hlsl +++ b/package/Shaders/DeferredCompositePS.hlsl @@ -8,19 +8,29 @@ #include "Common/Spherical Harmonics/SphericalHarmonics.hlsli" #include "Common/VR.hlsli" -Texture2D SpecularTexture : register(t0); -Texture2D AlbedoTexture : register(t1); +Texture2D MainInputTexture : register(t0); +Texture2D SpecularTexture : register(t1); + +#if defined(SSGI) || defined(DYNAMIC_CUBEMAPS) || defined(DEBUG) Texture2D NormalRoughnessTexture : register(t2); -Texture2D MasksTexture : register(t3); +#endif -Texture2D DepthTexture : register(t4); +#if defined(SSGI) || defined(DYNAMIC_CUBEMAPS) +Texture2D DepthTexture : register(t3); +#endif -Texture2D MainInputTexture : register(t17); +#if defined(SSGI) || defined(DEBUG) +Texture2D AlbedoTexture : register(t4); +#endif + +#if defined(SSGI) +Texture2D MasksTexture : register(t5); +#endif #if defined(DYNAMIC_CUBEMAPS) -Texture2D ReflectanceTexture : register(t5); -TextureCube EnvTexture : register(t6); -TextureCube EnvReflectionsTexture : register(t7); +Texture2D ReflectanceTexture : register(t6); +TextureCube EnvTexture : register(t7); +TextureCube EnvReflectionsTexture : register(t8); SamplerState LinearSampler : register(s0); #endif @@ -28,16 +38,15 @@ SamplerState LinearSampler : register(s0); #if defined(SKYLIGHTING) # include "Skylighting/Skylighting.hlsli" -Texture3D SkylightingProbeArray : register(t8); -Texture2DArray stbn_vec3_2Dx1D_128x128x64 : register(t9); - +Texture3D SkylightingProbeArray : register(t9); +Texture2DArray stbn_vec3_2Dx1D_128x128x64 : register(t10); #endif #if defined(SSGI) -Texture2D SsgiAoTexture : register(t10); -Texture2D SsgiYTexture : register(t11); -Texture2D SsgiCoCgTexture : register(t12); -Texture2D SsgiSpecularTexture : register(t13); +Texture2D SsgiAoTexture : register(t11); +Texture2D SsgiYTexture : register(t12); +Texture2D SsgiCoCgTexture : register(t13); +Texture2D SsgiSpecularTexture : register(t14); void SampleSSGI(uint2 pixCoord, float3 normalWS, out float ao, out float3 il) { @@ -103,24 +112,27 @@ PS_OUTPUT main(PS_INPUT input) uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); - float3 normalGlossiness = NormalRoughnessTexture[pixCoord]; - float3 normalVS = GBuffer::DecodeNormal(normalGlossiness.xy); - float3 diffuseColor = MainInputTexture[pixCoord].xyz; float3 specularColor = SpecularTexture[pixCoord]; - float3 albedo = AlbedoTexture[pixCoord]; + float3 linDiffuseColor = Color::IrradianceToLinear(diffuseColor); + +#if defined(SSGI) || defined(DYNAMIC_CUBEMAPS) + float3 normalGlossiness = NormalRoughnessTexture[pixCoord]; + float3 normalVS = GBuffer::DecodeNormal(normalGlossiness.xy); + float3 normalWS = normalize(mul(FrameBuffer::CameraViewInverse[eyeIndex], float4(normalVS, 0)).xyz); float depth = DepthTexture[pixCoord]; float4 positionWS = float4(2 * float2(uv.x, -uv.y + 1) - 1, depth, 1); positionWS = mul(FrameBuffer::CameraViewProjInverse[eyeIndex], positionWS); positionWS.xyz = positionWS.xyz / positionWS.w; +#endif +#if defined(DYNAMIC_CUBEMAPS) float glossiness = normalGlossiness.z; - - float3 linDiffuseColor = Color::IrradianceToLinear(diffuseColor); - float3 normalWS = normalize(mul(FrameBuffer::CameraViewInverse[eyeIndex], float4(normalVS, 0)).xyz); +#endif #if defined(SSGI) + float3 albedo = AlbedoTexture[pixCoord]; float ssgiAo; float3 ssgiIl; @@ -305,9 +317,22 @@ PS_OUTPUT main(PS_INPUT input) #if defined(DEBUG) +# if !defined(SSGI) && !defined(DYNAMIC_CUBEMAPS) + float3 normalGlossiness = NormalRoughnessTexture[pixCoord]; + float3 normalVS = GBuffer::DecodeNormal(normalGlossiness.xy); +# endif + +# if !defined(SSGI) + float3 albedo = AlbedoTexture[pixCoord]; +# endif + +# if !defined(DYNAMIC_CUBEMAPS) + float glossiness = normalGlossiness.z; +# endif + # if defined(VR) uv.x += (eyeIndex ? 0.1 : -0.1); -# endif // VR +# endif if (uv.x < 0.5 && uv.y < 0.5) { color = color; diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 4cd8bf6867..082ae64385 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -369,25 +369,24 @@ void Deferred::DeferredPasses() } // SRVs - ID3D11ShaderResourceView* srvs[18]{ - specular.SRV, - albedo.SRV, - normalRoughness.SRV, - masks.SRV, - dynamicCubemaps.loaded || REL::Module::IsVR() ? Util::GetCurrentSceneDepthSRV(true) : nullptr, - dynamicCubemaps.loaded ? reflectance.SRV : nullptr, - dynamicCubemaps.loaded ? dynamicCubemaps.envTexture->srv.get() : nullptr, - dynamicCubemaps.loaded ? dynamicCubemaps.envReflectionsTexture->srv.get() : nullptr, - dynamicCubemaps.loaded && skylighting.loaded ? skylighting.texProbeArray->srv.get() : nullptr, - dynamicCubemaps.loaded && skylighting.loaded ? skylighting.stbn_vec3_2Dx1D_128x128x64.get() : nullptr, - ssgi_ao, - ssgi_hq_spec ? nullptr : ssgi_y, - ssgi_hq_spec ? nullptr : ssgi_cocg, - ssgi_hq_spec ? ssgi_gi_spec : nullptr, - ibl.loaded ? ibl.envIBLTexture->srv.get() : nullptr, - ibl.loaded ? ibl.skyIBLTexture->srv.get() : nullptr, - nullptr, - mainCopy.SRV, + ID3D11ShaderResourceView* srvs[17]{ + mainCopy.SRV, // t0 MainInputTexture + specular.SRV, // t1 SpecularTexture + normalRoughness.SRV, // t2 NormalRoughnessTexture + dynamicCubemaps.loaded || REL::Module::IsVR() ? Util::GetCurrentSceneDepthSRV(true) : nullptr, // t3 DepthTexture + albedo.SRV, // t4 AlbedoTexture + masks.SRV, // t5 MasksTexture + dynamicCubemaps.loaded ? reflectance.SRV : nullptr, // t6 ReflectanceTexture + dynamicCubemaps.loaded ? dynamicCubemaps.envTexture->srv.get() : nullptr, // t7 EnvTexture + dynamicCubemaps.loaded ? dynamicCubemaps.envReflectionsTexture->srv.get() : nullptr, // t8 EnvReflectionsTexture + dynamicCubemaps.loaded && skylighting.loaded ? skylighting.texProbeArray->srv.get() : nullptr, // t9 SkylightingProbeArray + dynamicCubemaps.loaded && skylighting.loaded ? skylighting.stbn_vec3_2Dx1D_128x128x64.get() : nullptr, // t10 stbn + ssgi_ao, // t11 SsgiAoTexture + ssgi_hq_spec ? nullptr : ssgi_y, // t12 SsgiYTexture + ssgi_hq_spec ? nullptr : ssgi_cocg, // t13 SsgiCoCgTexture + ssgi_hq_spec ? ssgi_gi_spec : nullptr, // t14 SsgiSpecularTexture + ibl.loaded ? ibl.envIBLTexture->srv.get() : nullptr, // t15 EnvIBLTexture + ibl.loaded ? ibl.skyIBLTexture->srv.get() : nullptr, // t16 SkyIBLTexture }; context->PSSetShaderResources(0, ARRAYSIZE(srvs), srvs); From ec4ffb543817638ea652406d557899dd9bcea1a0 Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:34:23 +0100 Subject: [PATCH 07/17] chore: clanker changes --- package/Shaders/Tests/TestGBuffer.hlsl | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/package/Shaders/Tests/TestGBuffer.hlsl b/package/Shaders/Tests/TestGBuffer.hlsl index c2635aaabc..e704333164 100644 --- a/package/Shaders/Tests/TestGBuffer.hlsl +++ b/package/Shaders/Tests/TestGBuffer.hlsl @@ -5,8 +5,8 @@ /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingRoundtrip() { half3 testNormals[6] = { - half3(0.0h, 0.0h, 1.0h), - half3(0.0h, 0.0h, -1.0h), + half3(0.01h, 0.0h, 1.0h), // near +Z pole + half3(0.0h, 0.01h, -1.0h), // near -Z pole half3(1.0h, 0.0h, 0.0h), half3(-1.0h, 0.0h, 0.0h), half3(0.0h, 1.0h, 0.0h), @@ -24,9 +24,8 @@ } } - /// @tags gbuffer, normal, encoding - [numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() -{ +/// @tags gbuffer, normal, encoding +[numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() { half3 testNormals[4] = { normalize(half3(1.0h, 1.0h, 1.0h)), normalize(half3(-1.0h, 1.0h, 1.0h)), @@ -45,13 +44,12 @@ } /// @tags gbuffer, normal, encoding -[numthreads(1, 1, 1)] void TestNormalEncodingUpNormal() { - half3 upNormal = half3(0.0h, 0.0h, 1.0h); - half2 encoded = GBuffer::EncodeNormal(upNormal); - - ASSERT(IsTrue, abs(encoded.x - 0.5h) < 0.01h); - ASSERT(IsTrue, abs(encoded.y - 0.5h) < 0.01h); - +[numthreads(1, 1, 1)] void TestNormalEncodingEquator() { + half3 equatorNormal = half3(1.0h, 0.0h, 0.0h); + half2 encoded = GBuffer::EncodeNormal(equatorNormal); half3 decoded = GBuffer::DecodeNormal(encoded); - ASSERT(IsTrue, abs(decoded.z - 1.0h) < 0.01h); + + ASSERT(IsTrue, abs(decoded.x - 1.0h) < 0.01h); + ASSERT(IsTrue, abs(decoded.y) < 0.01h); + ASSERT(IsTrue, abs(decoded.z) < 0.01h); } From 866ca15a0c06be210262daabb7bf4b20399e6732 Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:35:39 +0100 Subject: [PATCH 08/17] refactor: dont use half --- package/Shaders/Common/GBuffer.hlsli | 8 ++-- package/Shaders/Tests/TestGBuffer.hlsl | 58 +++++++++++++------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/package/Shaders/Common/GBuffer.hlsli b/package/Shaders/Common/GBuffer.hlsli index 9835d13792..5d308f5d09 100644 --- a/package/Shaders/Common/GBuffer.hlsli +++ b/package/Shaders/Common/GBuffer.hlsli @@ -4,14 +4,14 @@ namespace GBuffer { - half2 EncodeNormal(half3 n) + float2 EncodeNormal(float3 n) { - n.z = max(1.0h / 1000.0h, sqrt(8.0h + -8.0h * n.z)); + n.z = max(0.001, sqrt(8.0 - 8.0 * n.z)); n.xy /= n.z; - return n.xy + 0.5h; + return n.xy + 0.5; } - half3 DecodeNormal(half2 enc) + float3 DecodeNormal(float2 enc) { float2 fenc = enc * 4.0 - 2.0; float f = dot(fenc, fenc); diff --git a/package/Shaders/Tests/TestGBuffer.hlsl b/package/Shaders/Tests/TestGBuffer.hlsl index e704333164..6b81774290 100644 --- a/package/Shaders/Tests/TestGBuffer.hlsl +++ b/package/Shaders/Tests/TestGBuffer.hlsl @@ -4,52 +4,52 @@ /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingRoundtrip() { - half3 testNormals[6] = { - half3(0.01h, 0.0h, 1.0h), // near +Z pole - half3(0.0h, 0.01h, -1.0h), // near -Z pole - half3(1.0h, 0.0h, 0.0h), - half3(-1.0h, 0.0h, 0.0h), - half3(0.0h, 1.0h, 0.0h), - half3(0.0h, -1.0h, 0.0h) + float3 testNormals[6] = { + float3(0.01, 0.0, 1.0), // near +Z pole + float3(0.0, 0.01, -1.0), // near -Z pole + float3(1.0, 0.0, 0.0), + float3(-1.0, 0.0, 0.0), + float3(0.0, 1.0, 0.0), + float3(0.0, -1.0, 0.0) }; for (int i = 0; i < 6; i++) { - half3 original = normalize(testNormals[i]); - half2 encoded = GBuffer::EncodeNormal(original); - half3 decoded = GBuffer::DecodeNormal(encoded); + float3 original = normalize(testNormals[i]); + float2 encoded = GBuffer::EncodeNormal(original); + float3 decoded = GBuffer::DecodeNormal(encoded); - ASSERT(IsTrue, abs(decoded.x - original.x) < 0.05h); - ASSERT(IsTrue, abs(decoded.y - original.y) < 0.05h); - ASSERT(IsTrue, abs(decoded.z - original.z) < 0.05h); + ASSERT(IsTrue, abs(decoded.x - original.x) < 0.05); + ASSERT(IsTrue, abs(decoded.y - original.y) < 0.05); + ASSERT(IsTrue, abs(decoded.z - original.z) < 0.05); } } /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() { - half3 testNormals[4] = { - normalize(half3(1.0h, 1.0h, 1.0h)), - normalize(half3(-1.0h, 1.0h, 1.0h)), - normalize(half3(1.0h, -1.0h, 1.0h)), - normalize(half3(1.0h, 1.0h, -1.0h)) + float3 testNormals[4] = { + normalize(float3(1.0, 1.0, 1.0)), + normalize(float3(-1.0, 1.0, 1.0)), + normalize(float3(1.0, -1.0, 1.0)), + normalize(float3(1.0, 1.0, -1.0)) }; for (int i = 0; i < 4; i++) { - half3 original = testNormals[i]; - half2 encoded = GBuffer::EncodeNormal(original); - half3 decoded = GBuffer::DecodeNormal(encoded); + float3 original = testNormals[i]; + float2 encoded = GBuffer::EncodeNormal(original); + float3 decoded = GBuffer::DecodeNormal(encoded); - half length = sqrt(decoded.x * decoded.x + decoded.y * decoded.y + decoded.z * decoded.z); - ASSERT(IsTrue, abs(length - 1.0h) < 0.05h); + float length = sqrt(decoded.x * decoded.x + decoded.y * decoded.y + decoded.z * decoded.z); + ASSERT(IsTrue, abs(length - 1.0) < 0.05); } } /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingEquator() { - half3 equatorNormal = half3(1.0h, 0.0h, 0.0h); - half2 encoded = GBuffer::EncodeNormal(equatorNormal); - half3 decoded = GBuffer::DecodeNormal(encoded); + float3 equatorNormal = float3(1.0, 0.0, 0.0); + float2 encoded = GBuffer::EncodeNormal(equatorNormal); + float3 decoded = GBuffer::DecodeNormal(encoded); - ASSERT(IsTrue, abs(decoded.x - 1.0h) < 0.01h); - ASSERT(IsTrue, abs(decoded.y) < 0.01h); - ASSERT(IsTrue, abs(decoded.z) < 0.01h); + ASSERT(IsTrue, abs(decoded.x - 1.0) < 0.01); + ASSERT(IsTrue, abs(decoded.y) < 0.01); + ASSERT(IsTrue, abs(decoded.z) < 0.01); } From e9786477f04fecd645290ad961274fe4a4f06123 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:37:27 +0000 Subject: [PATCH 09/17] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commi?= =?UTF-8?q?t.ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- package/Shaders/Tests/TestGBuffer.hlsl | 9 ++++---- src/Deferred.cpp | 30 +++++++++++++------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/package/Shaders/Tests/TestGBuffer.hlsl b/package/Shaders/Tests/TestGBuffer.hlsl index 6b81774290..85daaee166 100644 --- a/package/Shaders/Tests/TestGBuffer.hlsl +++ b/package/Shaders/Tests/TestGBuffer.hlsl @@ -5,8 +5,8 @@ /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingRoundtrip() { float3 testNormals[6] = { - float3(0.01, 0.0, 1.0), // near +Z pole - float3(0.0, 0.01, -1.0), // near -Z pole + float3(0.01, 0.0, 1.0), // near +Z pole + float3(0.0, 0.01, -1.0), // near -Z pole float3(1.0, 0.0, 0.0), float3(-1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), @@ -24,8 +24,9 @@ } } -/// @tags gbuffer, normal, encoding -[numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() { + /// @tags gbuffer, normal, encoding + [numthreads(1, 1, 1)] void TestNormalEncodingAngledNormals() +{ float3 testNormals[4] = { normalize(float3(1.0, 1.0, 1.0)), normalize(float3(-1.0, 1.0, 1.0)), diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 082ae64385..e3e03cd636 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -370,23 +370,23 @@ void Deferred::DeferredPasses() // SRVs ID3D11ShaderResourceView* srvs[17]{ - mainCopy.SRV, // t0 MainInputTexture - specular.SRV, // t1 SpecularTexture - normalRoughness.SRV, // t2 NormalRoughnessTexture - dynamicCubemaps.loaded || REL::Module::IsVR() ? Util::GetCurrentSceneDepthSRV(true) : nullptr, // t3 DepthTexture - albedo.SRV, // t4 AlbedoTexture - masks.SRV, // t5 MasksTexture - dynamicCubemaps.loaded ? reflectance.SRV : nullptr, // t6 ReflectanceTexture - dynamicCubemaps.loaded ? dynamicCubemaps.envTexture->srv.get() : nullptr, // t7 EnvTexture - dynamicCubemaps.loaded ? dynamicCubemaps.envReflectionsTexture->srv.get() : nullptr, // t8 EnvReflectionsTexture + mainCopy.SRV, // t0 MainInputTexture + specular.SRV, // t1 SpecularTexture + normalRoughness.SRV, // t2 NormalRoughnessTexture + dynamicCubemaps.loaded || REL::Module::IsVR() ? Util::GetCurrentSceneDepthSRV(true) : nullptr, // t3 DepthTexture + albedo.SRV, // t4 AlbedoTexture + masks.SRV, // t5 MasksTexture + dynamicCubemaps.loaded ? reflectance.SRV : nullptr, // t6 ReflectanceTexture + dynamicCubemaps.loaded ? dynamicCubemaps.envTexture->srv.get() : nullptr, // t7 EnvTexture + dynamicCubemaps.loaded ? dynamicCubemaps.envReflectionsTexture->srv.get() : nullptr, // t8 EnvReflectionsTexture dynamicCubemaps.loaded && skylighting.loaded ? skylighting.texProbeArray->srv.get() : nullptr, // t9 SkylightingProbeArray dynamicCubemaps.loaded && skylighting.loaded ? skylighting.stbn_vec3_2Dx1D_128x128x64.get() : nullptr, // t10 stbn - ssgi_ao, // t11 SsgiAoTexture - ssgi_hq_spec ? nullptr : ssgi_y, // t12 SsgiYTexture - ssgi_hq_spec ? nullptr : ssgi_cocg, // t13 SsgiCoCgTexture - ssgi_hq_spec ? ssgi_gi_spec : nullptr, // t14 SsgiSpecularTexture - ibl.loaded ? ibl.envIBLTexture->srv.get() : nullptr, // t15 EnvIBLTexture - ibl.loaded ? ibl.skyIBLTexture->srv.get() : nullptr, // t16 SkyIBLTexture + ssgi_ao, // t11 SsgiAoTexture + ssgi_hq_spec ? nullptr : ssgi_y, // t12 SsgiYTexture + ssgi_hq_spec ? nullptr : ssgi_cocg, // t13 SsgiCoCgTexture + ssgi_hq_spec ? ssgi_gi_spec : nullptr, // t14 SsgiSpecularTexture + ibl.loaded ? ibl.envIBLTexture->srv.get() : nullptr, // t15 EnvIBLTexture + ibl.loaded ? ibl.skyIBLTexture->srv.get() : nullptr, // t16 SkyIBLTexture }; context->PSSetShaderResources(0, ARRAYSIZE(srvs), srvs); From 5d024b4f5c86fda693f79d2746b2a82498fcd84d Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:53:30 +0100 Subject: [PATCH 10/17] perf: remove normalroughness entirely --- src/Deferred.cpp | 8 +++++--- src/Deferred.h | 3 ++- src/Features/ScreenSpaceGI.cpp | 6 +++--- src/Features/SubsurfaceScattering.cpp | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 082ae64385..4966389782 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -107,7 +107,7 @@ void Deferred::SetupResources() // Reflectance SetupRenderTarget(REFLECTANCE, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R11G11B10_FLOAT, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); // Normal + Roughness - SetupRenderTarget(NORMALROUGHNESS, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R10G10B10A2_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); + SetupRenderTarget(normalRoughnessRT, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R10G10B10A2_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); // Masks SetupRenderTarget(MASKS, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R11G11B10_FLOAT, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); @@ -249,10 +249,12 @@ void Deferred::StartDeferred() forwardRenderTargets[i] = renderTargets[i]; } + normalRoughnessRT = forwardRenderTargets[2]; + RE::RENDER_TARGET targets[8]{ RE::RENDER_TARGET::kMAIN, RE::RENDER_TARGET::kMOTION_VECTOR, - NORMALROUGHNESS, + normalRoughnessRT, ALBEDO, SPECULAR, REFLECTANCE, @@ -316,7 +318,7 @@ void Deferred::DeferredPasses() auto specular = renderer->GetRuntimeData().renderTargets[SPECULAR]; auto albedo = renderer->GetRuntimeData().renderTargets[ALBEDO]; - auto normalRoughness = renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS]; + auto normalRoughness = renderer->GetRuntimeData().renderTargets[normalRoughnessRT]; auto masks = renderer->GetRuntimeData().renderTargets[MASKS]; auto main = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[0]]; diff --git a/src/Deferred.h b/src/Deferred.h index 330979134b..e7cad8bfc5 100644 --- a/src/Deferred.h +++ b/src/Deferred.h @@ -6,7 +6,6 @@ #define ALBEDO RE::RENDER_TARGETS::kINDIRECT #define SPECULAR RE::RENDER_TARGETS::kINDIRECT_DOWNSCALED #define REFLECTANCE RE::RENDER_TARGETS::kRAWINDIRECT -#define NORMALROUGHNESS RE::RENDER_TARGETS::kRAWINDIRECT_DOWNSCALED #define MASKS RE::RENDER_TARGETS::kRAWINDIRECT_PREVIOUS #define MASKS2 RE::RENDER_TARGETS::kRAWINDIRECT_PREVIOUS_DOWNSCALED @@ -49,6 +48,8 @@ class Deferred winrt::com_ptr compositeStencilDSState; winrt::com_ptr compositeRasterizerState; + RE::RENDER_TARGET normalRoughnessRT = RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK; + bool deferredPass = false; ID3D11SamplerState* linearSampler = nullptr; diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp index 09f8156dba..bd8ac299dd 100644 --- a/src/Features/ScreenSpaceGI.cpp +++ b/src/Features/ScreenSpaceGI.cpp @@ -773,7 +773,7 @@ void ScreenSpaceGI::DrawSSGI() resetViews(); srvs.at(0) = rts[deferred->forwardRenderTargets[0]].SRV; srvs.at(1) = texWorkingDepth->srv.get(); - srvs.at(2) = rts[NORMALROUGHNESS].SRV; + srvs.at(2) = rts[globals::deferred->normalRoughnessRT].SRV; srvs.at(3) = texPrevGeo->srv.get(); srvs.at(4) = rts[RE::RENDER_TARGET::kMOTION_VECTOR].SRV; srvs.at(5) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); @@ -826,7 +826,7 @@ void ScreenSpaceGI::DrawSSGI() resetViews(); srvs.at(0) = texWorkingDepth->srv.get(); - srvs.at(1) = rts[NORMALROUGHNESS].SRV; + srvs.at(1) = rts[globals::deferred->normalRoughnessRT].SRV; srvs.at(2) = texRadiance->srv.get(); srvs.at(3) = texNoise->srv.get(); srvs.at(4) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); @@ -858,7 +858,7 @@ void ScreenSpaceGI::DrawSSGI() resetViews(); srvs.at(0) = texWorkingDepth->srv.get(); - srvs.at(1) = rts[NORMALROUGHNESS].SRV; + srvs.at(1) = rts[globals::deferred->normalRoughnessRT].SRV; srvs.at(2) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); srvs.at(3) = texIlY[inputGITexIdx]->srv.get(); srvs.at(4) = texIlCoCg[inputGITexIdx]->srv.get(); diff --git a/src/Features/SubsurfaceScattering.cpp b/src/Features/SubsurfaceScattering.cpp index cfa1179070..1b14934e08 100644 --- a/src/Features/SubsurfaceScattering.cpp +++ b/src/Features/SubsurfaceScattering.cpp @@ -233,7 +233,7 @@ void SubsurfaceScattering::DrawSSS() auto mask = renderer->GetRuntimeData().renderTargets[MASKS]; auto albedo = renderer->GetRuntimeData().renderTargets[ALBEDO]; - auto normal = renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS]; + auto normal = renderer->GetRuntimeData().renderTargets[globals::deferred->normalRoughnessRT]; ID3D11UnorderedAccessView* uav = blurHorizontalTemp->uav.get(); context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); From be1f481c1e1d64c4f293c4e6ad086891ab4b2ac6 Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:56:50 +0100 Subject: [PATCH 11/17] fix: fix normals --- package/Shaders/DeferredCompositePS.hlsl | 2 ++ src/Deferred.cpp | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/package/Shaders/DeferredCompositePS.hlsl b/package/Shaders/DeferredCompositePS.hlsl index 47ebf4f7da..16cb52600e 100644 --- a/package/Shaders/DeferredCompositePS.hlsl +++ b/package/Shaders/DeferredCompositePS.hlsl @@ -99,6 +99,7 @@ struct PS_INPUT struct PS_OUTPUT { float4 Main: SV_Target0; + float4 NormalRoughness: SV_Target1; }; PS_OUTPUT main(PS_INPUT input) @@ -348,5 +349,6 @@ PS_OUTPUT main(PS_INPUT input) PS_OUTPUT output; output.Main = float4(color, 1.0); + output.NormalRoughness = 0; return output; } diff --git a/src/Deferred.cpp b/src/Deferred.cpp index d72ccdc05e..22f04e3b22 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -137,7 +137,9 @@ void Deferred::SetupResources() auto device = globals::d3d::device; D3D11_BLEND_DESC blendDesc{}; + blendDesc.IndependentBlendEnable = TRUE; blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + blendDesc.RenderTarget[1].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_BLUE; DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, compositeBlendState.put())); D3D11_DEPTH_STENCIL_DESC dsDesc{}; @@ -398,7 +400,7 @@ void Deferred::DeferredPasses() // Render targets + stencil test for VR stereo culling bool useStencil = globals::game::isVR && globals::features::vr.stereoOpt.IsStencilActive(); - ID3D11RenderTargetView* rtvs[1]{ main.RTV }; + ID3D11RenderTargetView* rtvs[2]{ main.RTV, normalRoughness.RTV }; ID3D11DepthStencilView* dsv = useStencil ? depth.views[0] : nullptr; context->OMSetRenderTargets(ARRAYSIZE(rtvs), rtvs, dsv); context->OMSetBlendState(compositeBlendState.get(), nullptr, 0xFFFFFFFF); From d50e3dc8579581211b6dc80053cf1f2a24cbd51c Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 02:13:24 +0100 Subject: [PATCH 12/17] perf: ignore sky --- package/Shaders/DeferredCompositeVS.hlsl | 2 +- src/Deferred.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/package/Shaders/DeferredCompositeVS.hlsl b/package/Shaders/DeferredCompositeVS.hlsl index b4b8d394ad..0fbf098b89 100644 --- a/package/Shaders/DeferredCompositeVS.hlsl +++ b/package/Shaders/DeferredCompositeVS.hlsl @@ -8,7 +8,7 @@ VS_OUTPUT main(uint vertexID : SV_VertexID) { VS_OUTPUT output; float2 uv = float2((vertexID << 1) & 2, vertexID & 2); - output.Position = float4(uv * float2(2, -2) + float2(-1, 1), 0, 1); + output.Position = float4(uv * float2(2, -2) + float2(-1, 1), 1, 1); output.TexCoord = uv; return output; } diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 22f04e3b22..563a3b82fe 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -143,13 +143,15 @@ void Deferred::SetupResources() DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, compositeBlendState.put())); D3D11_DEPTH_STENCIL_DESC dsDesc{}; - dsDesc.DepthEnable = FALSE; + dsDesc.DepthEnable = TRUE; + dsDesc.DepthFunc = D3D11_COMPARISON_GREATER; dsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; dsDesc.StencilEnable = FALSE; DX::ThrowIfFailed(device->CreateDepthStencilState(&dsDesc, compositeDepthStencilState.put())); D3D11_DEPTH_STENCIL_DESC stencilDsDesc{}; - stencilDsDesc.DepthEnable = FALSE; + stencilDsDesc.DepthEnable = TRUE; + stencilDsDesc.DepthFunc = D3D11_COMPARISON_GREATER; stencilDsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; stencilDsDesc.StencilEnable = TRUE; stencilDsDesc.StencilReadMask = 0xFF; @@ -401,8 +403,7 @@ void Deferred::DeferredPasses() // Render targets + stencil test for VR stereo culling bool useStencil = globals::game::isVR && globals::features::vr.stereoOpt.IsStencilActive(); ID3D11RenderTargetView* rtvs[2]{ main.RTV, normalRoughness.RTV }; - ID3D11DepthStencilView* dsv = useStencil ? depth.views[0] : nullptr; - context->OMSetRenderTargets(ARRAYSIZE(rtvs), rtvs, dsv); + context->OMSetRenderTargets(ARRAYSIZE(rtvs), rtvs, depth.views[0]); context->OMSetBlendState(compositeBlendState.get(), nullptr, 0xFFFFFFFF); context->OMSetDepthStencilState(useStencil ? compositeStencilDSState.get() : compositeDepthStencilState.get(), 1); From 4ca2a7821b137fc5ef5d3ba6882abcc7f5404c5c Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 02:21:24 +0100 Subject: [PATCH 13/17] fix: fix normal srv --- src/Deferred.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 563a3b82fe..50cfef476e 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -357,9 +357,14 @@ void Deferred::DeferredPasses() stateBackup.Backup(context); auto& mainCopy = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN_COPY]; + auto normalRoughnessCopyRT = (normalRoughnessRT == RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK) + ? RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK_SWAP + : RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK; + auto& normalRoughnessCopy = renderer->GetRuntimeData().renderTargets[normalRoughnessCopyRT]; float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); D3D11_BOX srcBox = { 0, 0, 0, (UINT)resolution.x, (UINT)resolution.y, 1 }; context->CopySubresourceRegion(mainCopy.texture, 0, 0, 0, 0, main.texture, 0, &srcBox); + context->CopySubresourceRegion(normalRoughnessCopy.texture, 0, 0, 0, 0, normalRoughness.texture, 0, &srcBox); // Constant buffers { @@ -378,7 +383,7 @@ void Deferred::DeferredPasses() ID3D11ShaderResourceView* srvs[17]{ mainCopy.SRV, // t0 MainInputTexture specular.SRV, // t1 SpecularTexture - normalRoughness.SRV, // t2 NormalRoughnessTexture + normalRoughnessCopy.SRV, // t2 NormalRoughnessTexture dynamicCubemaps.loaded || REL::Module::IsVR() ? Util::GetCurrentSceneDepthSRV(true) : nullptr, // t3 DepthTexture albedo.SRV, // t4 AlbedoTexture masks.SRV, // t5 MasksTexture From dbd478c6da5ff4d14a20ecbb57b71e28aa58a71f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:23:15 +0000 Subject: [PATCH 14/17] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commi?= =?UTF-8?q?t.ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- src/Deferred.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 50cfef476e..4749feeaae 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -357,9 +357,7 @@ void Deferred::DeferredPasses() stateBackup.Backup(context); auto& mainCopy = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN_COPY]; - auto normalRoughnessCopyRT = (normalRoughnessRT == RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK) - ? RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK_SWAP - : RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK; + auto normalRoughnessCopyRT = (normalRoughnessRT == RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK) ? RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK_SWAP : RE::RENDER_TARGETS::kNORMAL_TAAMASK_SSRMASK; auto& normalRoughnessCopy = renderer->GetRuntimeData().renderTargets[normalRoughnessCopyRT]; float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); D3D11_BOX srcBox = { 0, 0, 0, (UINT)resolution.x, (UINT)resolution.y, 1 }; From 2a0b214bc87bf32de86b640111a954cde7cceead Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 02:34:59 +0100 Subject: [PATCH 15/17] fix: fix test failure due to view space normal --- package/Shaders/Tests/TestGBuffer.hlsl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/package/Shaders/Tests/TestGBuffer.hlsl b/package/Shaders/Tests/TestGBuffer.hlsl index 85daaee166..29fc05cb4b 100644 --- a/package/Shaders/Tests/TestGBuffer.hlsl +++ b/package/Shaders/Tests/TestGBuffer.hlsl @@ -4,16 +4,15 @@ /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingRoundtrip() { - float3 testNormals[6] = { - float3(0.01, 0.0, 1.0), // near +Z pole - float3(0.0, 0.01, -1.0), // near -Z pole + float3 testNormals[5] = { + float3(0.3, 0.0, 1.0), // near +Z float3(1.0, 0.0, 0.0), float3(-1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), float3(0.0, -1.0, 0.0) }; - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 5; i++) { float3 original = normalize(testNormals[i]); float2 encoded = GBuffer::EncodeNormal(original); float3 decoded = GBuffer::DecodeNormal(encoded); From 4ecf27886af0455c4aa94d5773a16e6a06deffe2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 19 Apr 2026 01:36:37 +0000 Subject: [PATCH 16/17] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commi?= =?UTF-8?q?t.ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- package/Shaders/Tests/TestGBuffer.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/Shaders/Tests/TestGBuffer.hlsl b/package/Shaders/Tests/TestGBuffer.hlsl index 29fc05cb4b..df4294272c 100644 --- a/package/Shaders/Tests/TestGBuffer.hlsl +++ b/package/Shaders/Tests/TestGBuffer.hlsl @@ -5,7 +5,7 @@ /// @tags gbuffer, normal, encoding [numthreads(1, 1, 1)] void TestNormalEncodingRoundtrip() { float3 testNormals[5] = { - float3(0.3, 0.0, 1.0), // near +Z + float3(0.3, 0.0, 1.0), // near +Z float3(1.0, 0.0, 0.0), float3(-1.0, 0.0, 0.0), float3(0.0, 1.0, 0.0), From 73a2b644f7c2f63ca13c0c00de5398f6896181ed Mon Sep 17 00:00:00 2001 From: doodlum <15017472+doodlum@users.noreply.github.com> Date: Sun, 19 Apr 2026 02:38:58 +0100 Subject: [PATCH 17/17] chore: ignore gs --- src/Utils/D3DStateBackup.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/Utils/D3DStateBackup.h b/src/Utils/D3DStateBackup.h index b03740c1c7..7144ce265a 100644 --- a/src/Utils/D3DStateBackup.h +++ b/src/Utils/D3DStateBackup.h @@ -16,8 +16,6 @@ namespace Util ID3D11VertexShader* vs = nullptr; ID3D11Buffer* vsCBs[kNumCBSlots] = {}; - ID3D11GeometryShader* gs = nullptr; - ID3D11RasterizerState* rsState = nullptr; UINT rsNumViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; D3D11_VIEWPORT rsViewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; @@ -43,8 +41,6 @@ namespace Util context->VSGetShader(&vs, nullptr, nullptr); context->VSGetConstantBuffers(0, kNumCBSlots, vsCBs); - context->GSGetShader(&gs, nullptr, nullptr); - context->RSGetState(&rsState); rsNumViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; context->RSGetViewports(&rsNumViewports, rsViewports); @@ -67,8 +63,6 @@ namespace Util context->VSSetShader(vs, nullptr, 0); context->VSSetConstantBuffers(0, kNumCBSlots, vsCBs); - context->GSSetShader(gs, nullptr, 0); - context->RSSetState(rsState); context->RSSetViewports(rsNumViewports, rsViewports); @@ -100,10 +94,6 @@ namespace Util cb = nullptr; } } - if (gs) { - gs->Release(); - gs = nullptr; - } if (rsState) { rsState->Release(); rsState = nullptr;