From 83769b047e97297f92e5932fde133507d564cd65 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sat, 14 Mar 2026 12:27:00 -0600 Subject: [PATCH 01/16] feat(vr): VR stereo optimizations with stencil culling, CAS sharpening, and DLSS fix - Hardware stencil-based Eye 1 pixel culling for forward passes - StereoBlend lateral reprojection from Eye 0 to Eye 1 - AMD CAS post-TAA sharpening with strength slider - RGB fringe suppression for tree alpha test artifacts - Fix UpscaleDepth to use upstream dev version (prevents DLSS crash) - Mip LOD bias control for VR foliage Co-Authored-By: Claude Opus 4.6 --- .../Features/VRStereoOptimizations.ini | 2 + package/Shaders/Common/SharedData.hlsli | 9 +- package/Shaders/Common/VR.hlsli | 9 + package/Shaders/DeferredCompositeCS.hlsl | 14 + package/Shaders/DistantTree.hlsl | 20 +- package/Shaders/Lighting.hlsl | 38 +- package/Shaders/RunGrass.hlsl | 51 +- package/Shaders/VR/CASCS.hlsl | 74 ++ package/Shaders/VR/StereoBlendCS.hlsl | 155 +++- package/Shaders/VR/VRPostProcessCS.hlsl | 114 +++ .../VRStereoOptimizations/ReprojectionCS.hlsl | 55 ++ .../VRStereoOptimizations/StencilCS.hlsl | 151 ++++ .../VRStereoOptimizations/StencilWritePS.hlsl | 54 ++ .../VRStereoOptimizations/StencilWriteVS.hlsl | 24 + .../VRStereoOptimizations/cbuffers.hlsli | 35 + src/Deferred.cpp | 44 +- src/Feature.cpp | 2 + src/Features/Upscaling.cpp | 520 +++++++++-- src/Features/Upscaling.h | 39 +- src/Features/VR.cpp | 6 + src/Features/VR.h | 8 +- src/Features/VR/SettingsUI.cpp | 2 +- src/Features/VR/StereoBlend.cpp | 112 ++- src/Features/VRStereoOptimizations.cpp | 812 ++++++++++++++++++ src/Features/VRStereoOptimizations.h | 230 +++++ src/Globals.cpp | 61 +- src/Globals.h | 2 + src/State.cpp | 19 +- src/State.h | 5 + 29 files changed, 2546 insertions(+), 121 deletions(-) create mode 100644 features/VR Stereo Optimizations/Shaders/Features/VRStereoOptimizations.ini create mode 100644 package/Shaders/VR/CASCS.hlsl create mode 100644 package/Shaders/VR/VRPostProcessCS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/StencilCS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/cbuffers.hlsli create mode 100644 src/Features/VRStereoOptimizations.cpp create mode 100644 src/Features/VRStereoOptimizations.h diff --git a/features/VR Stereo Optimizations/Shaders/Features/VRStereoOptimizations.ini b/features/VR Stereo Optimizations/Shaders/Features/VRStereoOptimizations.ini new file mode 100644 index 0000000000..000b60a568 --- /dev/null +++ b/features/VR Stereo Optimizations/Shaders/Features/VRStereoOptimizations.ini @@ -0,0 +1,2 @@ +[Info] +Version = 1-0-0 diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 4ea0d4d07c..3ddf2f9ec7 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -23,8 +23,13 @@ namespace SharedData bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach - float MipBias; // Offset to mip level for TAA sharpness# - float pad0; + float MipBias; // Offset to mip level for TAA sharpness + float VRMipBias; // Additional negative MIP bias for VR foliage sharpening (depth-scaled) + float VRMipBiasNearDist; // Game units: no VR MIP bias closer than this + float VRMipBiasFarDist; // Game units: full VR MIP bias beyond this + uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees (TREE_ANIM) only + float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = disabled) + float2 pad0; float4 AmbientSHR; float4 AmbientSHG; float4 AmbientSHB; diff --git a/package/Shaders/Common/VR.hlsli b/package/Shaders/Common/VR.hlsli index d744022781..37f9238d41 100644 --- a/package/Shaders/Common/VR.hlsli +++ b/package/Shaders/Common/VR.hlsli @@ -21,6 +21,7 @@ cbuffer VRValues : register(b13) float2 EyeOffsetScale : packoffset(c0.z); float4 EyeClipEdge[2] : packoffset(c1); } + #endif namespace Stereo @@ -626,6 +627,14 @@ namespace Stereo vsout.VRPosition.z = clipPos.z; vsout.VRPosition.w = clipPos.w; + // Hardcoded ~0.75px diagonal jitter for Eye 1 stereo edge supersampling. + // Larger offset increases chance of different alpha test outcomes between eyes + // (tree branches vs sky). NDC for 6304x3088 SBS reference; scales with resolution. + if (a_eyeIndex == 1) { + static const float2 kJitterNDC = float2(1.68e-4, -3.44e-4); + vsout.VRPosition.xy += kJitterNDC * vsout.VRPosition.w; + } + vsout.ClipDistance = clipEdges.y; vsout.CullDistance = clipEdges.x; # endif // VR diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl index f149255718..88fa6f8632 100644 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -19,6 +19,10 @@ RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); RWTexture2D MotionVectorsRW : register(u2); Texture2D DepthTexture : register(t4); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + #if defined(DYNAMIC_CUBEMAPS) Texture2D ReflectanceTexture : register(t5); TextureCube EnvTexture : register(t6); @@ -92,6 +96,16 @@ void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, uv *= FrameBuffer::DynamicResolutionParams2.xy; // adjust for dynamic res uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[uint2(dispatchID.xy)]; + if (mode == 2 || mode == 1) { // MODE_MAIN or MODE_EDGE — stencil-culled, no valid G-buffer + return; + } + } +#endif + uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); float3 normalGlossiness = NormalRoughnessTexture[dispatchID.xy]; diff --git a/package/Shaders/DistantTree.hlsl b/package/Shaders/DistantTree.hlsl index cbd4608676..8799feb633 100644 --- a/package/Shaders/DistantTree.hlsl +++ b/package/Shaders/DistantTree.hlsl @@ -203,8 +203,14 @@ PS_OUTPUT main(PS_INPUT input) float alpha = TexDiffuse.SampleBias(SampDiffuse, input.TexCoord.xy, SharedData::MipBias).w; - if ((alpha - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +#if defined(VR) + alphaRef -= eyeIndex * 0.1; +#endif + if ((alpha - alphaRef) < 0) { + discard; + } } psout.Diffuse.xyz = input.Depth.xxx / input.Depth.yyy; @@ -213,8 +219,14 @@ PS_OUTPUT main(PS_INPUT input) float4 baseColor = TexDiffuse.SampleBias(SampDiffuse, input.TexCoord.xy, SharedData::MipBias); baseColor.xyz = Color::Diffuse(baseColor.xyz); - if ((baseColor.w - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +#if defined(VR) + alphaRef -= eyeIndex * 0.1; +#endif + if ((baseColor.w - alphaRef) < 0) { + discard; + } } # if defined(DEFERRED) diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index 777d0bd0c0..9d4c21120e 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -1780,7 +1780,19 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) rawRMAOS = blendedRMAOS; # endif # else // Non-landscape code - float4 rawBaseColor = TexColorSampler.SampleBias(SampColorSampler, diffuseUv, SharedData::MipBias); + // VR MIP bias: depth-gated sharpening for distant textures + // Mode 1 = All Textures, Mode 2 = Distant Trees (TREE_ANIM) only + float vrFoliageBias = 0; +# if defined(TREE_ANIM) + if (SharedData::VRMipBias < 0) { +# else + if (SharedData::VRMipBias < 0 && SharedData::VRMipBiasMode == 1) { +# endif + float linDepth = SharedData::GetScreenDepth(input.Position.z); + float t = saturate((linDepth - SharedData::VRMipBiasNearDist) / max(SharedData::VRMipBiasFarDist - SharedData::VRMipBiasNearDist, 1.0)); + vrFoliageBias = SharedData::VRMipBias * t; + } + float4 rawBaseColor = TexColorSampler.SampleBias(SampColorSampler, diffuseUv, SharedData::MipBias + vrFoliageBias); baseColor = float4(Color::Diffuse(rawBaseColor.rgb), rawBaseColor.a); float4 normalColor = TexNormalSampler.SampleBias(SampNormalSampler, uv, SharedData::MipBias); normal = normalColor; @@ -3021,11 +3033,11 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) float alpha = baseColor.w; # if defined(EMAT) && !defined(LANDSCAPE) # if defined(PARALLAX) - alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias).w; + alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias + vrFoliageBias).w; # elif defined(TRUE_PBR) [branch] if (PBRParallax) { - alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias).w; + alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias + vrFoliageBias).w; } # endif # endif @@ -3074,11 +3086,31 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } alpha = saturate(1.05 * alpha); # endif // DEPTH_WRITE_DECALS +# if defined(TREE_ANIM) + // Fixed alpha floor — catch zombie texels with near-zero alpha + if (alpha < 0.1) { + discard; + } if (alpha - AlphaTestRefRS < 0) { discard; } + // Suppress RGB fringe contamination from negative MIP bias. + // Low-alpha texels near the foliage boundary have bright padding bleeding into RGB. + // Alpha is a direct proxy for contamination — low alpha = more padding contribution. + // Scale correction by bias strength so close-range (no bias) textures are untouched. + if (vrFoliageBias < 0) { + float biasStrength = saturate(vrFoliageBias / min(SharedData::VRMipBias, -0.001)); + float fringeScale = 5.0; // higher = more aggressive fringe suppression + baseColor.rgb *= saturate(alpha * lerp(1.0, fringeScale, biasStrength)); + } +# else + if (alpha - AlphaTestRefRS < 0) { + discard; + } +# endif // TREE_ANIM # endif // DO_ALPHA_TEST + # if defined(ANISOTROPIC_ALPHA) // Uniform alpha material settings uint AlphaMaterialModel = ExtendedTranslucency::GetMaterialModelFromDescriptor(Permutation::ExtraFeatureDescriptor); diff --git a/package/Shaders/RunGrass.hlsl b/package/Shaders/RunGrass.hlsl index f05c3d0edd..805779acce 100644 --- a/package/Shaders/RunGrass.hlsl +++ b/package/Shaders/RunGrass.hlsl @@ -480,22 +480,37 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) bool complex = abs(complexLength - 1.0) < SharedData::grassLightingSettings.ComplexGrassThreshold; # endif // !TRUE_PBR + // VR MIP bias: depth-gated sharpening for distant textures + float vrGrassBias = 0; + if (SharedData::VRMipBias < 0 && SharedData::VRMipBiasMode == 1) { + float linDepth = SharedData::GetScreenDepth(input.HPosition.z); + float t = saturate((linDepth - SharedData::VRMipBiasNearDist) / max(SharedData::VRMipBiasFarDist - SharedData::VRMipBiasNearDist, 1.0)); + vrGrassBias = SharedData::VRMipBias * t; + } + float4 baseColor; # if !defined(TRUE_PBR) if (complex) { - baseColor = TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, input.TexCoord.y * 0.5), SharedData::MipBias); + baseColor = TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, input.TexCoord.y * 0.5), SharedData::MipBias + vrGrassBias); } else # endif // !TRUE_PBR { - baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias); + baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias); } baseColor.xyz = Color::Diffuse(baseColor.xyz); # if defined(RENDER_DEPTH) float diffuseAlpha = input.VertexColor.w * baseColor.w; - if ((diffuseAlpha - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +#if defined(VR) + uint convergenceEyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); + alphaRef -= convergenceEyeIndex * 0.1; +#endif + if ((diffuseAlpha - alphaRef) < 0) { + discard; + } } # endif // RENDER_DEPTH || DO_ALPHA_TEST @@ -505,9 +520,9 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) psout.PS.w = diffuseAlpha; # else # if !defined(TRUE_PBR) - float4 specColor = complex ? TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, 0.5 + input.TexCoord.y * 0.5), SharedData::MipBias) : 1; + float4 specColor = complex ? TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, 0.5 + input.TexCoord.y * 0.5), SharedData::MipBias + vrGrassBias) : 1; # else - float4 specColor = TexNormalSampler.SampleBias(SampNormalSampler, input.TexCoord.xy, SharedData::MipBias); + float4 specColor = TexNormalSampler.SampleBias(SampNormalSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias); # endif uint eyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); @@ -548,7 +563,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # endif // !TRUE_PBR # if defined(TRUE_PBR) - float4 rawRMAOS = TexRMAOSSampler.SampleBias(SampRMAOSSampler, input.TexCoord.xy, SharedData::MipBias) * float4(PBRParams1.x, 1, 1, PBRParams1.y); + float4 rawRMAOS = TexRMAOSSampler.SampleBias(SampRMAOSSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias) * float4(PBRParams1.x, 1, 1, PBRParams1.y); PBR::SurfaceProperties pbrSurfaceProperties = PBR::InitSurfaceProperties(); @@ -846,13 +861,27 @@ PS_OUTPUT main(PS_INPUT input) { PS_OUTPUT psout; - float4 baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias); + // VR MIP bias: depth-gated sharpening for distant textures + float vrGrassBias = 0; + if (SharedData::VRMipBias < 0 && SharedData::VRMipBiasMode == 1) { + float linDepth = SharedData::GetScreenDepth(input.HPosition.z); + float t = saturate((linDepth - SharedData::VRMipBiasNearDist) / max(SharedData::VRMipBiasFarDist - SharedData::VRMipBiasNearDist, 1.0)); + vrGrassBias = SharedData::VRMipBias * t; + } + + float4 baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias); # if defined(RENDER_DEPTH) float diffuseAlpha = input.VertexColor.w * baseColor.w; - - if ((diffuseAlpha - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +#if defined(VR) + uint convergenceEyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); + alphaRef -= convergenceEyeIndex * 0.1; +#endif + if ((diffuseAlpha - alphaRef) < 0) { + discard; + } } # endif // RENDER_DEPTH || DO_ALPHA_TEST diff --git a/package/Shaders/VR/CASCS.hlsl b/package/Shaders/VR/CASCS.hlsl new file mode 100644 index 0000000000..9c379e6792 --- /dev/null +++ b/package/Shaders/VR/CASCS.hlsl @@ -0,0 +1,74 @@ +// AMD Contrast Adaptive Sharpening (CAS) - Sharpen-only for VR +// Based on AMD FidelityFX CAS (sharpen-only path) +// Reference: https://gpuopen.com/fidelityfx-cas/ +// +// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// CASParams[0] = sharpness (0.0 = no sharpening, 1.0 = maximum sharpening) +StructuredBuffer CASParams : register(t1); + +Texture2D Source : register(t0); +RWTexture2D Dest : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 DTid : SV_DispatchThreadID) { + uint2 texDim; + Dest.GetDimensions(texDim.x, texDim.y); + + if (DTid.x >= texDim.x || DTid.y >= texDim.y) + return; + + float sharpness = CASParams[0]; + + // Fetch 3x3 neighborhood + int2 sp = int2(DTid.xy); + float3 a = Source.Load(int3(sp + int2(-1, -1), 0)).rgb; + float3 b = Source.Load(int3(sp + int2(0, -1), 0)).rgb; + float3 c = Source.Load(int3(sp + int2(1, -1), 0)).rgb; + float3 d = Source.Load(int3(sp + int2(-1, 0), 0)).rgb; + float3 e = Source.Load(int3(sp, 0)).rgb; + float3 f = Source.Load(int3(sp + int2(1, 0), 0)).rgb; + float3 g = Source.Load(int3(sp + int2(-1, 1), 0)).rgb; + float3 h = Source.Load(int3(sp + int2(0, 1), 0)).rgb; + float3 i = Source.Load(int3(sp + int2(1, 1), 0)).rgb; + + // Soft min/max of cross neighborhood + float3 mnRGB = min(min(min(d, e), min(f, b)), h); + float3 mxRGB = max(max(max(d, e), max(f, b)), h); + + // Expand with diagonal neighbors for soft min/max + float3 mnRGB2 = min(min(a, c), min(g, i)); + float3 mxRGB2 = max(max(a, c), max(g, i)); + mnRGB += mnRGB2; + mxRGB += mxRGB2; + + // Adaptive sharpening amount + float3 ampRGB = saturate(min(mnRGB, 2.0 - mxRGB) * rcp(mxRGB)); + ampRGB = rsqrt(ampRGB); + + // Peak controls sharpening strength: + // sharpness 0.0 -> peak 8.0 (no sharpening) + // sharpness 1.0 -> peak 5.0 (maximum sharpening) + float peak = -3.0 * sharpness + 8.0; + float3 wRGB = -rcp(ampRGB * peak); + float3 rcpWeightRGB = rcp(4.0 * wRGB + 1.0); + + // Apply sharpening filter + float3 outColor = saturate(((b + d) + (f + h)) * wRGB + e) * rcpWeightRGB; + + Dest[DTid.xy] = float4(outColor, 1.0); +} diff --git a/package/Shaders/VR/StereoBlendCS.hlsl b/package/Shaders/VR/StereoBlendCS.hlsl index 7322e9e513..7983c25e76 100644 --- a/package/Shaders/VR/StereoBlendCS.hlsl +++ b/package/Shaders/VR/StereoBlendCS.hlsl @@ -11,6 +11,7 @@ #include "Common/Color.hlsli" #include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" #include "Common/VR.hlsli" Texture2D ColorTexture : register(t0); @@ -18,6 +19,19 @@ Texture2D DepthTexture : register(t1); RWTexture2D OutputRW : register(u0); +#ifdef STEREO_OVERWRITE +RWTexture2D MotionRW : register(u1); +Texture2D ModeTexture : register(t2); + +// Mode constants matching VRStereoOptimizations/cbuffers.hlsli +// (can't include directly — its cbuffer on b1 conflicts with StereoBlendCB) +#define MODE_DISOCCLUDED 0 +#define MODE_EDGE 1 +#define MODE_MAIN 2 +#define MODE_EDGE_NEIGHBOUR 3 +#define MODE_FULL_BLEND 4 +#endif + cbuffer StereoBlendCB : register(b1) { float2 FrameDim; @@ -25,11 +39,15 @@ cbuffer StereoBlendCB : register(b1) float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer + float FullBlendDistance; + float2 _pad; }; -static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend -static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend +static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kDepthAgreementThreshold = 0.015; // Relative depth difference threshold for overwrite mode disocclusion rejection // Samples four depth neighbors in a cross pattern (±offset pixels) around center, // clamped to eyeIndex's half of the packed stereo buffer to avoid seam contamination. @@ -46,6 +64,125 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (any(dtid >= uint2(FrameDim))) return; +#ifdef STEREO_OVERWRITE + // ========================================================================= + // Mode-driven stereo merge: reads per-pixel classification from StencilCS + // and applies appropriate action per mode and eye. + // Mode texture is full SBS resolution — ModeTexture[dtid] maps directly. + // ========================================================================= + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float centerDepth = DepthTexture[dtid]; + + // HMD mask pixels (depth >= 1.0 in reversed-Z) — always skip + if (centerDepth >= 1.0) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Debug mode 1: depth map diagnostic — show mode texture as solid colors (all pixels) + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); + return; + } + + // Debug mode 2: full blend depth visualizer — cyan tint based on proximity to FullBlendDistance + if (DebugMode == 2) { + if (centerDepth < 1e-5 || centerDepth >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(centerDepth); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // MODE_DISOCCLUDED: fully shaded, leave untouched + if (pixelMode == MODE_DISOCCLUDED) + return; + + // MODE_FULL_BLEND: bilateral blend for 2x supersampling + if (pixelMode == MODE_FULL_BLEND) { + float4 center = ColorTexture[dtid]; + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = ColorTexture[r.otherPx]; + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + float4 result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + + OutputRW[dtid] = result; + return; + } + + if (eyeIndex == 0) { + // Eye 0: fully shaded for all modes — only apply debug tint to edge pixels + if (DebugEdgeTint > 0 && pixelMode == MODE_EDGE) { + float4 c = ColorTexture[dtid]; + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), DebugEdgeTint), c.a); + } + return; + } + + // Eye 1: reproject all non-disoccluded, non-full-blend pixels (MAIN, EDGE) from Eye 0. + // StencilCS already performed the authoritative disocclusion check with the correct + // depth buffer state — no redundant depth agreement check here. + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + if (!r.valid) + return; + + // Skip if the Eye 0 source pixel is sky/unrendered (depth at clear value). + // At DeferredPasses time, sky hasn't rendered yet — source would have clear color. + // Let the sky/water pass fill these pixels later instead. + float sourceDepth = DepthTexture[r.otherPx]; + if (sourceDepth >= 1.0 || sourceDepth < 1e-5) + return; + + OutputRW[dtid] = ColorTexture[r.otherPx]; + MotionRW[dtid] = MotionRW[r.otherPx]; + +#else // Normal bilateral blend path + +#ifdef EYE0_ONLY + // Only process Eye 0 (left half) - Eye 1 left untouched + float2 uvCheck = (dtid + 0.5) * RcpFrameDim; + if (Stereo::GetEyeIndexFromTexCoord(uvCheck) == 1) + return; +#endif + float2 uv = (dtid + 0.5) * RcpFrameDim; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); @@ -68,8 +205,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) // depth == 1.0: sky/far plane (no real geometry, bilateral reprojection not meaningful) bool isSkipPixel = centerDepth < 1e-5 || centerDepth >= 1.0; if (!isSkipPixel) { - // Source edge detection: skip at depth discontinuities (arm/world silhouettes, - // object edges). Saves VP reprojection work and prevents halo artifacts. + // Normal bilateral blend path float4 srcEdgeDepths = SampleCrossDepths(dtid, 1, eyeIndex); if (Stereo::MaxDepthDiff(centerDepth, srcEdgeDepths) > kEdgeDepthThreshold) { debugState = 1; @@ -78,10 +214,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (r.valid) { float otherDepth = DepthTexture[r.otherPx]; - // Destination edge detection: skip if the reprojected pixel is near the HMD - // mask boundary or at a depth discontinuity in the other eye. Due to VR - // parallax the arm silhouette appears at a different screen position per eye, - // so the reprojection can cross a boundary invisible from this eye. float4 dstEdgeDepths = SampleCrossDepths(r.otherPx, kEdgeMargin, 1 - eyeIndex); if (any(dstEdgeDepths < 1e-5) || Stereo::MaxDepthDiff(otherDepth, dstEdgeDepths) > kEdgeDepthThreshold) { debugState = 2; @@ -89,9 +221,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float4 otherColor = ColorTexture[r.otherPx]; Stereo::FinalizeStereoBlend(r, uv, centerDepth, otherDepth, eyeIndex, FrameDim, DepthSigma, MaxBlendFactor); - // Only blend where the two eyes actually disagree (screen-space effect - // inconsistency). Luminance difference below the threshold means both - // eyes computed the same result and blending would only destroy parallax. float colorDiff = abs(dot(centerColor.rgb, float3(0.2126, 0.7152, 0.0722)) - dot(otherColor.rgb, float3(0.2126, 0.7152, 0.0722))); float colorGate = smoothstep(ColorDiffThreshold * 0.5, ColorDiffThreshold * 2.0, colorDiff); @@ -148,4 +277,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) #else OutputRW[dtid] = blendedColor; #endif + +#endif // STEREO_OVERWRITE } diff --git a/package/Shaders/VR/VRPostProcessCS.hlsl b/package/Shaders/VR/VRPostProcessCS.hlsl new file mode 100644 index 0000000000..a51e19e8db --- /dev/null +++ b/package/Shaders/VR/VRPostProcessCS.hlsl @@ -0,0 +1,114 @@ +// VR Post-Process - Bilateral blend for near-camera 2x supersampling +// +// Runs after all compositing and stereo blending is complete. +// Reads per-pixel classification from StencilCS and applies: +// - MODE_FULL_BLEND: bilateral depth-weighted blend for 2x supersampling +// +// Only MODE_FULL_BLEND pixels are processed. All others pass through untouched. + +#include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" + +Texture2D ColorTexture : register(t0); // Copy of final composited image +Texture2D ModeTexture : register(t1); +Texture2D DepthTexture : register(t2); + +RWTexture2D OutputRW : register(u0); + +cbuffer VRPostProcessCB : register(b1) +{ + float2 FrameDim; + float2 RcpFrameDim; + float DebugEdgeTint; // 0 = off, >0 = debug visualization strength + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer + float FullBlendDistance; // Linearized depth threshold for full blend zone visualization + float _pad; // Pad to 16-byte alignment +}; + +#define MODE_DISOCCLUDED 0 +#define MODE_EDGE 1 +#define MODE_MAIN 2 +#define MODE_EDGE_NEIGHBOUR 3 +#define MODE_FULL_BLEND 4 + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) +{ + if (any(dtid >= uint2(FrameDim))) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Depth map diagnostic: show mode texture contents as solid colors + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); // Orange = full blend zone + return; + } + + // Full blend depth visualizer: shows the depth boundary as a cyan tint + if (DebugMode == 2) { + float2 uvDb = (dtid + 0.5) * RcpFrameDim; + float depthDb = DepthTexture[dtid]; + if (depthDb < 1e-5 || depthDb >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(depthDb); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // Only process full blend pixels + if (pixelMode != MODE_FULL_BLEND) + return; + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float4 result = ColorTexture[dtid]; + + // === MODE_FULL_BLEND: bilateral blend for 2x supersampling === + { + float4 center = result; + float centerDepth = DepthTexture[dtid]; + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes. + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = ColorTexture[r.otherPx]; + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + } + + OutputRW[dtid] = result; +} diff --git a/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl new file mode 100644 index 0000000000..bd34d26d58 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl @@ -0,0 +1,55 @@ +// VR Stereo Optimizations - Reprojection Compute Shader +// +// Fills Eye 1 pixels that were stencil-culled during rendering by reprojecting +// color data from Eye 0. Only operates on pixels classified as MODE_MAIN. +// +// Reads Eye 0 color directly from the OutputRW UAV (left half) and writes to +// Eye 1 (right half). No read-write conflict because reads and writes target +// strictly different halves of the texture. +// +// Input: +// t0 = Depth buffer +// t1 = Per-pixel mode classification texture +// Output: +// u0 = Main render target UAV (reads Eye 0, writes Eye 1) + +#include "Common/VR.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); +Texture2D ModeTexture : register(t1); + +RWTexture2D OutputRW : register(u0); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + uint eyeWidth = (uint)FrameDim.x / 2; + uint eyeHeight = (uint)FrameDim.y; + + if (any(dtid >= uint2(eyeWidth, eyeHeight))) + return; + + // dtid is in Eye 1 local coords; convert to stereo buffer coords + uint2 stereoCoord = uint2(dtid.x + eyeWidth, dtid.y); + + // Only fill pixels that were marked for reprojection + // Mode texture is full SBS resolution, so use stereoCoord for Eye 1 + uint mode = ModeTexture[stereoCoord]; + if (mode != MODE_MAIN) + return; + + float depth = DepthTexture[stereoCoord]; + + // Compute mono UV for this Eye 1 pixel + float2 stereoUV = (float2(stereoCoord) + 0.5) * RcpFrameDim; + float2 monoUV = Stereo::ConvertFromStereoUV(stereoUV, 1); + + // Reproject to Eye 0 and sample color + float3 otherEyeUV = Stereo::ConvertMonoUVToOtherEye(float3(monoUV, depth), 1); + float2 eye0StereoUV = Stereo::ConvertToStereoUV(otherEyeUV.xy, 0); + int2 eye0Px = clamp(int2(eye0StereoUV * FrameDim), int2(0, 0), int2(FrameDim) - 1); + + float4 reprojectedColor = OutputRW[eye0Px]; + + // Write to Eye 1 in the main render target + OutputRW[stereoCoord] = reprojectedColor; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl new file mode 100644 index 0000000000..647edacef4 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl @@ -0,0 +1,151 @@ +// VR Stereo Optimizations - Stencil Classification Compute Shader +// +// Classifies BOTH eyes over the full SBS buffer. Each pixel is tagged as: +// MODE_DISOCCLUDED - Must be fully shaded (sky, HMD mask, parallax-occluded) +// MODE_EDGE - Depth edge boundary (dist 1) or inner/foreground band; fully shaded + bilateral blend +// MODE_MAIN - Standard pixel eligible for reprojection / bilateral blend +// MODE_FULL_BLEND - Near-camera geometry: both eyes fully shaded for 2x supersampling +// +// Dispatched over full SBS resolution (FrameDim.x x FrameDim.y). + +#include "Common/VR.hlsli" +#include "Common/SharedData.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); + +RWTexture2D ModeTextureRW : register(u0); + +static const float kDisocclusionThreshold = 0.015; + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + if (any(dtid >= uint2(FrameDim))) + return; + + // Determine which eye this pixel belongs to + float2 uv = (float2(dtid) + 0.5) / FrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + // Read depth directly in SBS coords + float centerDepth = DepthTexture[dtid]; + +#ifdef DEBUG_DEPTH_MAP + // DIAGNOSTIC: Visualize what depth values StencilCS sees. + // Green (MODE_EDGE) = depth >= 1.0 (HMD mask threshold) + // Magenta (MODE_EDGE_NEIGHBOUR) = depth < 1e-5 (sky threshold) + // No tint (MODE_MAIN) = normal geometry with valid depth + if (centerDepth >= 1.0) { + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + if (centerDepth < 1e-5) { + ModeTextureRW[dtid] = MODE_EDGE_NEIGHBOUR; + return; + } + ModeTextureRW[dtid] = MODE_MAIN; + return; +#endif + + // Sky/unrendered pixels (depth >= 1.0 at z-prepass time = depth buffer clear value) + // and HMD mask pixels both have depth >= 1.0 here. Treat them the same as sky: + // let edge detection run so geometry-vs-sky boundaries get classified. + // HMD mask pixels are in lens corners with no nearby geometry, so they'll + // fall through to MODE_DISOCCLUDED at the end. + bool isSky = (centerDepth < 1e-5) || (centerDepth >= 1.0); + float linCenter = isSky ? 999999.0 : SharedData::GetScreenDepth(centerDepth); + + // Near-camera supersampling: geometry closer than FullBlendDistance gets full + // shading in both eyes for bilateral blend (2x supersampling in VRPostProcess). + if (!isSky && linCenter < FullBlendDistance) { + ModeTextureRW[dtid] = MODE_FULL_BLEND; + return; + } + + // --- Disocclusion detection via reprojection (runs for all non-sky pixels) --- + // Early return: disoccluded pixels are always MODE_DISOCCLUDED regardless of edge proximity. + // This ensures MinEdgeDistance never affects disocclusion classification. + if (!isSky) { + Stereo::StereoBilateralResult reproj = Stereo::ReprojectToOtherEye( + uv, + centerDepth, + eyeIndex, + FrameDim); + + bool isDisoccluded = false; + if (!reproj.valid) { + isDisoccluded = true; + } else { + float otherDepth = DepthTexture[reproj.otherPx]; + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float relativeDepthDiff = abs(centerDepth - otherDepth) / maxDepth; + isDisoccluded = (relativeDepthDiff > kDisocclusionThreshold); + } + + if (isDisoccluded) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + } + + // Depth gate: skip edge detection for nearby geometry (saves perf, distant AA matters more) + // Sky pixels always run edge detection — they need to expand the edge band outward. + // Disocclusion detection (above) is independent of this gate and always runs. + bool skipEdgeDetection = !isSky && (linCenter < MinEdgeDistance); + + // --- Edge detection with two-tier classification --- + // MODE_EDGE: immediate neighbor (distance 1) has depth discontinuity, OR + // inner/foreground band (distance <= kInnerWidth). + static const uint kInnerWidth = 2; + int2 offsets[4] = { int2(-1, 0), int2(1, 0), int2(0, -1), int2(0, 1) }; + + uint nearestEdgeDist = 0xFFFFFFFF; // nearest distance at which a discontinuity was found + bool nearestWeAreOuter = false; // whether we are on the background side at that nearest hit + + // Use the larger of inner/outer widths for the search + uint maxWidth = kInnerWidth; + + if (!skipEdgeDetection) { + [loop] + for (uint d = 1; d <= maxWidth; d++) { + [unroll] + for (int i = 0; i < 4; i++) { + int2 rawNeighbor = int2(dtid) + offsets[i] * (int)d; + uint2 neighborCoord = Stereo::ClampToEyeBounds(rawNeighbor, eyeIndex, FrameDim); + + float neighborDepth = DepthTexture[neighborCoord]; + bool neighborIsSky = (neighborDepth < 1e-5) || (neighborDepth >= 1.0); + float linNeighbor = neighborIsSky ? 999999.0 : SharedData::GetScreenDepth(neighborDepth); + float maxLin = max(max(linCenter, linNeighbor), 1e-5); + float relDepthDiff = abs(linCenter - linNeighbor) / maxLin; + + if (relDepthDiff > EdgeDepthThreshold && d < nearestEdgeDist) { + nearestEdgeDist = d; + nearestWeAreOuter = (linNeighbor < linCenter); // neighbor closer to camera = we are background + } + } + } + + } // !skipEdgeDetection + + if (nearestEdgeDist != 0xFFFFFFFF) { + // Classify based on distance and side + if (nearestEdgeDist == 1) { + // Immediate neighbor discontinuity: always MODE_EDGE regardless of side + ModeTextureRW[dtid] = MODE_EDGE; + return; + } else if (!nearestWeAreOuter && nearestEdgeDist <= kInnerWidth) { + // Inner/foreground band beyond distance 1 + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + } + + // Sky pixels that aren't near edges -> disoccluded (reprojection is meaningless for sky) + if (isSky) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + + // Standard pixel + ModeTextureRW[dtid] = MODE_MAIN; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl new file mode 100644 index 0000000000..a7316065b9 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl @@ -0,0 +1,54 @@ +// VR Stereo Optimizations - Stencil Write Pixel Shader +// +// Reads from the per-pixel mode classification texture and depth texture. +// Discards pixels that should NOT be stencil-culled: +// - MODE_DISOCCLUDED (0) = fully shaded in Eye 1, no reprojection needed +// - MODE_FULL_BLEND (4) = near-camera pixels fully shaded in both eyes for supersampling +// - Sky/HMD-mask pixels (depth >= 1.0 or depth < 1e-5) = need normal rendering +// in the sky pass; they keep their MODE_EDGE tag in +// the mode texture for VRPostProcess but must not be stencil-culled. +// +// Only geometry MODE_MAIN/MODE_EDGE pixels survive and get stencil ref=1 written. +// +// Mode texture is full SBS resolution (same as render target). +// The DSS is configured with StencilFunc=ALWAYS, StencilPassOp=REPLACE, ref=1. +// Pixels that survive (not discarded) get stencil=1 written. + +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D ModeTexture : register(t0); +Texture2D DepthTexture : register(t1); + +struct PS_INPUT +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; +}; + +void main(PS_INPUT input) +{ + // Mode texture is full SBS resolution — SV_Position maps directly + // (viewport is Eye 1 half, so SV_Position.x starts at eyeWidth) + int2 modeCoord = int2(input.Position.xy); + + uint mode = ModeTexture[modeCoord]; + + // MODE_MAIN and MODE_EDGE in Eye 1 write stencil ref=1 (reprojectable). + // These are reprojected from Eye 0; MODE_DISOCCLUDED and MODE_FULL_BLEND are fully shaded in Eye 1. + if (mode == MODE_DISOCCLUDED) + discard; + + // Sky/HMD-mask pixels must not be stencil-culled regardless of edge classification. + // They keep their MODE_EDGE tag in the mode texture for VRPostProcess, + // but must render normally in the sky pass (which runs after stencil culling). + float depth = DepthTexture[modeCoord]; + if (depth >= 1.0 || depth < 1e-5) + discard; + + // MODE_FULL_BLEND: near-camera pixels fully shaded in both eyes for supersampling + if (mode == MODE_FULL_BLEND) + discard; + + // Pixel survives: DSS writes stencil ref=1 + // No color output (no RTV bound) +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl new file mode 100644 index 0000000000..7a45fa60c6 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl @@ -0,0 +1,24 @@ +// VR Stereo Optimizations - Stencil Write Vertex Shader +// +// Procedural fullscreen triangle covering Eye 1 (right half of SBS buffer). +// No vertex buffer needed — vertex positions are generated from SV_VertexID. +// The viewport is set to Eye 1 by the C++ code, so we just emit a standard +// fullscreen triangle in clip space. + +struct VS_OUTPUT +{ + float4 Position : SV_Position; + float2 TexCoord : TEXCOORD0; +}; + +VS_OUTPUT main(uint vertexID : SV_VertexID) +{ + VS_OUTPUT output; + + // Fullscreen triangle: 3 vertices covering [-1,1] clip space + float2 uv = float2((vertexID << 1) & 2, vertexID & 2); + output.Position = float4(uv * float2(2, -2) + float2(-1, 1), 0, 1); + output.TexCoord = uv; + + return output; +} diff --git a/package/Shaders/VRStereoOptimizations/cbuffers.hlsli b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli new file mode 100644 index 0000000000..76f31b6883 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli @@ -0,0 +1,35 @@ +// VR Stereo Optimizations - Shared constant buffer layout +// Must match VRStereoOptParams in VRStereoOptimizations.h exactly + +#ifndef __VR_STEREO_OPT_CBUFFERS_HLSLI__ +#define __VR_STEREO_OPT_CBUFFERS_HLSLI__ + +cbuffer VRStereoOptParams : register(b1) +{ + float2 FrameDim; // Full stereo buffer dimensions (both eyes) + float2 RcpFrameDim; // 1.0 / FrameDim + + uint StereoModeValue; // 0=Off, 1=Enable + float DisocclusionThreshold; // Depth difference threshold for disocclusion detection + float EdgeDepthThreshold; // Relative depth difference threshold for edge detection + uint EdgeWidth; // Half-width of edge detection band in pixels + + float2 QualityJitter; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; // Radius of foveal region in UV space + float pad2; + + float2 FoveatedCenter; // Center of foveal region in UV space + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth below which pixels get MODE_FULL_BLEND (game units) +}; + +#define STEREO_MODE_OFF 0 +#define STEREO_MODE_ENABLE 1 + +#define MODE_DISOCCLUDED 0 // Fully shaded, no reprojection, no blend (sky, HMD mask, parallax-occluded) +#define MODE_EDGE 1 // Depth edge boundary (distance 1) or inner/foreground band; fully shaded + bilateral blend +#define MODE_MAIN 2 // Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite reproject (Perf) / bilateral (Quality) +#define MODE_EDGE_NEIGHBOUR 3 // (Legacy, unused) Outer/background band — now classified as MODE_MAIN +#define MODE_FULL_BLEND 4 // Near-camera geometry: both eyes fully shaded + bilateral blend for 2x supersampling + +#endif diff --git a/src/Deferred.cpp b/src/Deferred.cpp index a0a322500b..a0a5063e5e 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -14,6 +14,7 @@ #include "Features/TerrainBlending.h" #include "Features/Upscaling.h" #include "Features/VR.h" +#include "Features/VRStereoOptimizations.h" #include "Features/WeatherEditor.h" #include "Hooks.h" @@ -275,6 +276,11 @@ void Deferred::StartDeferred() PrepassPasses(); OverrideBlendStates(); + + // VR: Classify Eye 1 pixels and write hardware stencil marks before geometry rendering + if (globals::game::isVR) { + globals::features::vrStereoOptimizations.DispatchStencil(); + } } void Deferred::DeferredPasses() @@ -363,6 +369,14 @@ void Deferred::DeferredPasses() context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + // Bind VRStereoOptimizations mode texture for Eye 1 skip + auto& vrStereoOpt = globals::features::vrStereoOptimizations; + if (REL::Module::IsVR() && vrStereoOpt.loaded && vrStereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off) { + ID3D11ShaderResourceView* modeSRV = vrStereoOpt.GetModeTextureSRV(); + if (modeSRV) + context->CSSetShaderResources(16, 1, &modeSRV); + } + ID3D11UnorderedAccessView* uavs[3]{ main.UAV, normals.UAV, motionVectors.UAV }; context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); @@ -370,13 +384,28 @@ void Deferred::DeferredPasses() context->CSSetShader(shader, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + + // Unbind mode texture SRV + if (REL::Module::IsVR() && vrStereoOpt.loaded && vrStereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources(16, 1, &nullSRV); + } } - // VR stereo consistency blend - depth-aware bilateral blend at the eye seam - // Runs after composite as a general safety net for all screen-space effects. - // Must run before clearing b12/b13 -- needs FrameBuffer matrices for reprojection. - if (globals::game::isVR) + // VR: Deactivate stencil culling now that geometry rendering is complete. + // Must happen before StereoBlend so the blend pass itself isn't stencil-blocked. + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vrStereoOptimizations; + if (stereoOpt.IsStencilActive()) { + stereoOpt.DeactivateStencil(); + } + } + + // VR: Stereo reprojection fills Eye 1 holes here (after DeferredComposite, before SSR/water/sky) + // so that ISReflectionsRayTracing sees valid pixels in both eyes. + if (globals::game::isVR) { globals::features::vr.DrawStereoBlend(); + } // Clear { @@ -551,6 +580,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainComposite() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeCS; @@ -576,6 +608,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainCompositeInterior() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeInteriorCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeInteriorCS; @@ -593,6 +628,7 @@ void Deferred::Hooks::Main_RenderWorld::thunk(bool a1) state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::InWorld); state->inWorld = true; func(a1); + state->inWorld = false; state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::InWorld); }; diff --git a/src/Feature.cpp b/src/Feature.cpp index 24b634979f..f3625e7d48 100644 --- a/src/Feature.cpp +++ b/src/Feature.cpp @@ -30,6 +30,7 @@ #include "Features/UnifiedWater.h" #include "Features/Upscaling.h" #include "Features/VR.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricLighting.h" #include "Features/VolumetricShadows.h" #include "Features/WaterEffects.h" @@ -247,6 +248,7 @@ const std::vector& Feature::GetFeatureList() static auto BuildVRList = []() -> std::vector { auto v = features; v.push_back(&globals::features::vr); + v.push_back(&globals::features::vrStereoOptimizations); // In developer mode, keep all features for testing // In production mode, filter to VR-compatible only diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 604f49e986..33e8f1098b 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -1,7 +1,9 @@ #include "Upscaling.h" #include "Deferred.h" +#include "Features/VRStereoOptimizations.h" #include "Hooks.h" +#include "TAAReorder.h" #include "State.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" @@ -25,7 +27,9 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( sharpnessFSR, sharpnessDLSS, presetDLSS, - useGatherWideKernel); + useGatherWideKernel, + vrDlssViewportScale, + vrPeripheryTAA); decltype(&D3D11CreateDeviceAndSwapChain) ptrD3D11CreateDeviceAndSwapChainUpscaling; @@ -61,7 +65,9 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( upscaling.CheckBackendFeatures(pAdapter); // Use better swap effect to prevent tearing and improve performance - pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + // But don't change it for VR as it can affect frame pacing with the VR compositor + if (!globals::game::isVR) + pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; bool shouldProxy = !globals::game::isVR; if (shouldProxy) @@ -141,8 +147,11 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( ppImmediateContext); if (upscaling.IsBackendInitialized()) { - upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); - upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + // Skip interface wrapping for VR - it can affect frame pacing with VR compositor + if (!globals::game::isVR) { + upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); + upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + } upscaling.SetBackendD3DDevice(*ppDevice); upscaling.PostBackendDevice(); } @@ -238,6 +247,30 @@ void Upscaling::DrawSettings() ImGui::Text("Set to 'Default' for automatic selection based on your Upscale Preset and hardware."); ImGui::Text("Changing this setting requires a restart to take effect."); } + + if (globals::game::isVR) { + if (ImGui::TreeNodeEx("VR Viewport Scaling", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::SliderFloat("DLSS Viewport Scale", &settings.vrDlssViewportScale, 0.5f, 1.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Controls the fraction of each eye's view that DLSS processes."); + ImGui::Text("Lower values = better performance, less visible area upscaled by DLSS."); + ImGui::Text("The VR lens hides the periphery, so 0.7-0.85 is recommended."); + ImGui::Text("The periphery outside the DLSS region is filled with a bilinear upscale."); + } + + if (settings.vrDlssViewportScale < 1.0f) { + bool peripheryTAA = settings.vrPeripheryTAA != 0; + if (ImGui::Checkbox("Periphery TAA", &peripheryTAA)) + settings.vrPeripheryTAA = peripheryTAA ? 1 : 0; + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Applies temporal anti-aliasing to the bilinear-upscaled periphery."); + ImGui::Text("Reduces shimmer and improves peripheral quality."); + ImGui::Text("The DLSS center region passes through unchanged."); + } + } + ImGui::TreePop(); + } + } } if (globals::game::isVR) { @@ -460,6 +493,7 @@ void Upscaling::LoadSettings(json& o_json) logger::warn("[Upscaling] Loaded useGatherWideKernel {} out of range, clamping to 1", settings.useGatherWideKernel); settings.useGatherWideKernel = 1; } + settings.vrDlssViewportScale = std::clamp(settings.vrDlssViewportScale, 0.5f, 1.0f); auto iniSettingCollection = globals::game::iniPrefSettingCollection; if (iniSettingCollection) { auto setting = iniSettingCollection->GetSetting("bUseTAA:Display"); @@ -487,6 +521,10 @@ void Upscaling::DataLoaded() void Upscaling::Load() { *(uintptr_t*)&ptrD3D11CreateDeviceAndSwapChainUpscaling = SKSE::PatchIAT(hk_D3D11CreateDeviceAndSwapChainUpscaling, "d3d11.dll", "D3D11CreateDeviceAndSwapChain"); + + // Install depth/stencil registration hook early (before renderer creates targets) + if (globals::game::isVR) + TAAReorder::InitEarly(); } struct BSImageSpace_Init_FXAA @@ -530,6 +568,10 @@ void Upscaling::PostPostLoad() stl::detour_thunk(REL::RelocationID(98974, 105626)); logger::info("[Upscaling] Installed hooks"); + + // Install TAA reordering hooks for VR periphery TAA + if (globals::game::isVR) + TAAReorder::Init(); } Upscaling::UpscaleMethod Upscaling::GetUpscaleMethod() const @@ -639,6 +681,13 @@ void Upscaling::DestroyUpscalingTextureResources(UpscaleMethod a_upscalemethod) } } + // VR periphery TAA textures - only needed for DLSS with viewport scaling + if (a_upscalemethod != UpscaleMethod::kDLSS) { + vrPreTAACopy = nullptr; + for (int i = 0; i < 2; i++) + vrTAAdPerEye[i].reset(); + } + // Motion vector copy texture is only needed for DLSS - destroy when switching away from DLSS if (a_upscalemethod != UpscaleMethod::kDLSS) { if (motionVectorCopyTexture) { @@ -692,6 +741,8 @@ void Upscaling::CheckResources(UpscaleMethod a_upscalemethod) vrIntermediateMotionVectors[i].reset(); vrIntermediateReactiveMask[i].reset(); vrIntermediateTransparencyMask[i].reset(); + vrFinalOutput[i].reset(); + vrCropColorIn[i].reset(); } } } @@ -744,6 +795,8 @@ ID3D11PixelShader* Upscaling::GetDepthRefractionUpscalePS() if (!depthRefractionUpscalePS) { logger::debug("Compiling DepthRefractionUpscalePS.hlsl"); std::vector> defines = { { "PSHADER", "" } }; + if (globals::game::isVR) + defines.push_back({ "VR", "" }); depthRefractionUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DepthRefractionUpscalePS.hlsl", defines, "ps_5_0")); } @@ -771,6 +824,37 @@ ID3D11VertexShader* Upscaling::GetUpscaleVS() return upscaleVS.get(); } +ID3D11PixelShader* Upscaling::GetDlssCompositePS() +{ + if (!vrDlssCompositePS) { + logger::debug("Compiling DLSSCompositePS.hlsl"); + vrDlssCompositePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0")); + } + + return vrDlssCompositePS.get(); +} + +ID3D11PixelShader* Upscaling::GetDlssUpscalePS() +{ + if (!vrDlssUpscalePS) { + logger::debug("Compiling DLSSCompositePS.hlsl (BILINEAR_UPSCALE)"); + vrDlssUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", + { { "PSHADER", "" }, { "BILINEAR_UPSCALE", "" } }, "ps_5_0")); + } + + if (!vrDlssUpscaleCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = sizeof(DlssCompositeCB); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrDlssUpscaleCB.put())); + } + + return vrDlssUpscalePS.get(); +} + eastl::unique_ptr Upscaling::CreateTextureFromSource(ID3D11Resource* src, uint32_t width, uint32_t height, bool copyBindFlags, bool createSRV, bool createUAV, const char* name) { @@ -820,7 +904,7 @@ void Upscaling::CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight std::string suffix = (i == 0) ? "Left" : "Right"; vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, inWidth, inHeight, false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); - vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, false, ("Upscale_ColorOut_" + suffix).c_str()); + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); // Depth: R32_TYPELESS base (matches kMAIN), with R32_FLOAT SRV for ClearHMDMaskCS. // CopySubresourceRegion requires matching typeless formats; SRV reinterprets as R32_FLOAT. @@ -873,41 +957,185 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; - bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; - if (!needsRecreate) { - needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); - } - if (needsRecreate) { - logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", - eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); - CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, - colorSrc, mvecSrc, reactiveSrc, transparencySrc); - } + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + if (viewportScaling) { + // Viewport scaling: physically crop all DLSS inputs to eliminate non-zero subrect offsets. + // vrIntermediateColorIn stays at FULL render-res (for ClearHMDMask + FillPeriphery). + // All other DLSS inputs (depth, mvec, masks) are CROP-sized. + // vrCropColorIn is CROP-sized (DLSS color input, extracted from masked full color). + // This ensures DLSS sees all inputs at {0,0} with no subrect base offsets, + // which is critical for correct temporal reprojection during camera motion. + uint32_t cropWidthIn = (uint32_t)(eyeWidthIn * vpScale); + uint32_t cropHeightIn = (uint32_t)(eyeHeightIn * vpScale); + uint32_t cropWidthOut = (uint32_t)(eyeWidthOut * vpScale); + uint32_t cropHeightOut = (uint32_t)(eyeHeightOut * vpScale); + + bool needsRecreate = !vrIntermediateColorIn[0] || !vrCropColorIn[0] || !vrIntermediateDepth[0] || + !vrIntermediateColorOut[0] || !vrFinalOutput[0]; + if (!needsRecreate) { + // Check format too — periphery TAA feeds R8G8B8A8 post-PP intermediate, + // while normal DLSS feeds R11G11B10 kMAIN. Must recreate on format change. + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrCropColorIn[0]->desc.Width != cropWidthIn || + vrCropColorIn[0]->desc.Height != cropHeightIn || + vrIntermediateDepth[0]->desc.Width != cropWidthIn || + vrIntermediateDepth[0]->desc.Height != cropHeightIn || + vrIntermediateColorOut[0]->desc.Width != cropWidthOut || + vrIntermediateColorOut[0]->desc.Height != cropHeightOut || + vrFinalOutput[0]->desc.Width != eyeWidthOut || + vrFinalOutput[0]->desc.Height != eyeHeightOut); + } - // Extract both eyes' inputs from combined stereo buffers - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; - - context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); - } + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR viewport-scaled intermediates: full {}x{}, crop in {}x{}, crop out {}x{}", + eyeWidthIn, eyeHeightIn, cropWidthIn, cropHeightIn, cropWidthOut, cropHeightOut); + + for (int i = 0; i < 2; i++) { + std::string suffix = (i == 0) ? "Left" : "Right"; + + // Full-size color for ClearHMDMask + FillPeriphery + vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, eyeWidthIn, eyeHeightIn, + false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); + + // Crop-sized DLSS color input (needs UAV for ClearHMDMask) + vrCropColorIn[i] = CreateTextureFromSource(colorSrc, cropWidthIn, cropHeightIn, + false, true, true, ("Upscale_CropColorIn_" + suffix).c_str()); + + // Crop-sized DLSS output + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, cropWidthOut, cropHeightOut, + false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); + + // Crop-sized depth (R32_TYPELESS with R32_FLOAT SRV) + { + D3D11_TEXTURE2D_DESC depthDesc = {}; + depthDesc.Width = cropWidthIn; + depthDesc.Height = cropHeightIn; + depthDesc.MipLevels = 1; + depthDesc.ArraySize = 1; + depthDesc.Format = DXGI_FORMAT_R32_TYPELESS; + depthDesc.SampleDesc.Count = 1; + depthDesc.Usage = D3D11_USAGE_DEFAULT; + depthDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + vrIntermediateDepth[i] = eastl::make_unique(depthDesc); + Util::SetResourceName(vrIntermediateDepth[i]->resource.get(), ("Upscale_Depth_" + suffix).c_str()); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + vrIntermediateDepth[i]->CreateSRV(srvDesc); + } + + // Crop-sized motion vectors, reactive mask, transparency mask + vrIntermediateMotionVectors[i] = CreateTextureFromSource(mvecSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_MVec_" + suffix).c_str()); + vrIntermediateReactiveMask[i] = CreateTextureFromSource(reactiveSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Reactive_" + suffix).c_str()); + vrIntermediateTransparencyMask[i] = CreateTextureFromSource(transparencySrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Transparency_" + suffix).c_str()); + + // Full display-res composition target + vrFinalOutput[i] = CreateTextureFromSource(colorSrc, eyeWidthOut, eyeHeightOut, + false, true, true, ("Upscale_FinalOutput_" + suffix).c_str()); + } + } - // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer. - // Depth is read from the combined stereo SRV at the per-eye offset; color is written - // to the isolated per-eye UAV (ColorOffsetX = 0). - auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + // Copy full eye to full-size vrIntermediateColorIn (raw render-res, no HMD mask yet) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + } - for (uint32_t i = 0; i < 2; ++i) { - uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; - ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, - eyeWidthIn, eyeHeightIn, depthOffset, 0); + uint32_t cropOffsetX = (eyeWidthIn - cropWidthIn) / 2; + uint32_t cropOffsetY = (eyeHeightIn - cropHeightIn) / 2; + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + + // Extract DLSS crop from raw buffer (before TAA or HMD mask), then mask the crop directly. + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + + // Crop color from raw (unmasked, non-TAA'd) full-size buffer + D3D11_BOX cropBox = { cropOffsetX, cropOffsetY, 0, cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrCropColorIn[i]->resource.get(), 0, 0, 0, 0, + vrIntermediateColorIn[i]->resource.get(), 0, &cropBox); + + // ClearHMDMask directly on the crop (depth offset accounts for eye + crop position in stereo buffer) + ClearHMDMask(vrCropColorIn[i]->uav.get(), depthTexture.depthSRV, + cropWidthIn, cropHeightIn, offsetXIn + cropOffsetX, 0, cropOffsetY); + + // Crop depth/mvec/reactive/transparency directly from stereo buffers + D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, + offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, + depthSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, + mvecSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, + reactiveSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, + transparencySrc, 0, &stereoCropBox); + } + + // ClearHMDMask on full-size buffer (for FillPeriphery) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } + } else { + // Non-viewport-scaling path: all textures at full per-eye dimensions + bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; + if (!needsRecreate) { + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + } + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", + eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, + colorSrc, mvecSrc, reactiveSrc, transparencySrc); + } + + // Release viewport-scaling-specific textures + for (int i = 0; i < 2; i++) { + vrCropColorIn[i].reset(); + vrFinalOutput[i].reset(); + vrTAAdPerEye[i].reset(); + } + vrPreTAACopy = nullptr; + + // Copy full eye to per-eye intermediates + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); + } + + // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } } if (state->frameAnnotations) @@ -929,11 +1157,34 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); uint32_t eyeHeightOut = (uint32_t)screenSize.y; - // Write upscaled outputs back + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + for (uint32_t i = 0; i < 2; ++i) { uint32_t offsetXOut = (i == 1) ? eyeWidthOut : 0; - D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; - context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + + if (viewportScaling && vrFinalOutput[i]) { + // Paste crop-sized DLSS output into center of full-size composition target + uint32_t dlssWidthOut = vrIntermediateColorOut[i]->desc.Width; + uint32_t dlssHeightOut = vrIntermediateColorOut[i]->desc.Height; + uint32_t pasteX = (eyeWidthOut - dlssWidthOut) / 2; + uint32_t pasteY = (eyeHeightOut - dlssHeightOut) / 2; + + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + + // Copy composition target to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrFinalOutput[i]->resource.get(), 0, &outBox); + } else { + // Direct copy DLSS output to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + } } if (state->frameAnnotations) @@ -941,7 +1192,11 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) } void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX) + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY, + uint32_t depthWidth, uint32_t depthHeight, + uint32_t colorWidth, uint32_t colorHeight, + ID3D11ShaderResourceView* fallbackSRV, uint32_t fallbackOffsetX) { if (!globals::game::isVR) return; @@ -952,7 +1207,7 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe vrClearHMDMaskCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/ClearHMDMaskCS.hlsl", {}, "cs_5_0")); D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = 16; // 4 uints + cbDesc.ByteWidth = 32; // 8 uints (offsets + optional scaling dimensions) cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -965,8 +1220,9 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->CSSetShader(vrClearHMDMaskCS.get(), nullptr, 0); - ID3D11ShaderResourceView* srvs[1] = { depthSRV }; - context->CSSetShaderResources(0, 1, srvs); + // t0 = depth, t1 = fallback (nullptr → unbound → reads return (0,0,0,0) → black) + ID3D11ShaderResourceView* srvs[2] = { depthSRV, fallbackSRV }; + context->CSSetShaderResources(0, 2, srvs); ID3D11UnorderedAccessView* uavs[1] = { colorUAV }; context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); @@ -974,9 +1230,10 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe D3D11_MAPPED_SUBRESOURCE mapped{}; context->Map(vrClearHMDMaskCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - uint32_t offsets[4] = { depthOffsetX, colorOffsetX, 0, 0 }; + uint32_t cbData[8] = { depthOffsetX, colorOffsetX, depthOffsetY, fallbackOffsetX, + depthWidth, depthHeight, colorWidth, colorHeight }; - memcpy(mapped.pData, offsets, sizeof(offsets)); + memcpy(mapped.pData, cbData, sizeof(cbData)); context->Unmap(vrClearHMDMaskCB.get(), 0); ID3D11Buffer* cbs[1] = { vrClearHMDMaskCB.get() }; @@ -984,13 +1241,81 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->Dispatch(dispatchX, dispatchY, 1); + // Unbind + ID3D11ShaderResourceView* nullSRVs[2] = { nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + ID3D11Buffer* nullCB[1] = { nullptr }; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } +} + +void Upscaling::FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV) +{ + if (!globals::game::isVR || !vrFinalOutput[eyeIndex]) + return; + if (!overrideSRV && !vrIntermediateColorIn[eyeIndex]) + return; + + auto context = globals::d3d::context; + + if (!vrPeripheryFillCS) { + vrPeripheryFillCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/VRPeripheryFillCS.hlsl", {}, "cs_5_0")); + + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 16; // 4 uints + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrPeripheryFillCB.put())); + + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&samplerDesc, vrLinearSampler.put())); + } + + if (vrPeripheryFillCS) { + auto dispatchX = (dstWidth + 7) / 8; + auto dispatchY = (dstHeight + 7) / 8; + + context->CSSetShader(vrPeripheryFillCS.get(), nullptr, 0); + + // Read from overrideSRV (e.g. TAA texture) or default render-res per-eye input. + ID3D11ShaderResourceView* srvs[1] = { overrideSRV ? overrideSRV : vrIntermediateColorIn[eyeIndex]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[eyeIndex]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrPeripheryFillCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + uint32_t cbData[4] = { srcWidth, srcHeight, dstWidth, dstHeight }; + memcpy(mapped.pData, cbData, sizeof(cbData)); + context->Unmap(vrPeripheryFillCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrPeripheryFillCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + + context->Dispatch(dispatchX, dispatchY, 1); + // Unbind ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; ID3D11Buffer* nullCB[1] = { nullptr }; + ID3D11SamplerState* nullSampler[1] = { nullptr }; context->CSSetShaderResources(0, 1, nullSRV); context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetSamplers(0, 1, nullSampler); context->CSSetShader(nullptr, nullptr, 0); } } @@ -1101,6 +1426,10 @@ void Upscaling::ConfigureUpscaling(RE::BSGraphics::State* a_viewport) // Disable dynamic resolution unless the game explicitly enables it if (!globals::game::isVR) runtimeData.dynamicResolutionLock = 1; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // No longer need to force-disable culling when upscaling is active. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs. } void Upscaling::SetupResources() @@ -1145,6 +1474,7 @@ void Upscaling::SetupResources() depthStencilDesc.BackFace.StencilDepthFailOp = depthStencilDesc.FrontFace.StencilDepthFailOp; depthStencilDesc.BackFace.StencilPassOp = depthStencilDesc.FrontFace.StencilPassOp; depthStencilDesc.BackFace.StencilFunc = depthStencilDesc.FrontFace.StencilFunc; + } else { depthStencilDesc.StencilEnable = false; // Disable stencil testing } @@ -1198,6 +1528,12 @@ void Upscaling::ClearShaderCache() depthRefractionUpscalePS = nullptr; // com_ptr automatically releases underwaterMaskUpscalePS = nullptr; // com_ptr automatically releases upscaleVS = nullptr; // com_ptr automatically releases + vrClearHMDMaskCS = nullptr; + vrPeripheryFillCS = nullptr; + vrPeripheryFillCB = nullptr; + vrDlssCompositePS = nullptr; + vrDlssUpscalePS = nullptr; + vrDlssUpscaleCB = nullptr; } void Upscaling::CopySharedD3D12Resources() @@ -1408,6 +1744,18 @@ bool Upscaling::IsUpscalingActive() const return resolutionScale.x < .99f; } +std::vector Upscaling::GetActiveConstraints() const +{ + std::vector constraints; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs, + // so we no longer need to constrain depth buffer culling when upscaling is active. + + return constraints; +} + + /** * @brief Retrieves the current frame time for frame generation. * @@ -1519,7 +1867,7 @@ Upscaling::BlurResources Upscaling::GetBlurResources() const return {}; } -void Upscaling::Upscale() +void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) { auto upscaleMethod = GetUpscaleMethod(); @@ -1580,8 +1928,13 @@ void Upscaling::Upscale() { state->BeginPerfEvent("Upscaling"); + // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) + ID3D11Resource* colorSrc = colorSourceOverride + ? static_cast(colorSourceOverride) + : static_cast(main.texture); + if (upscaleMethod == UpscaleMethod::kDLSS) { - streamline.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); + streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); } else if (upscaleMethod == UpscaleMethod::kFSR) { fidelityFX.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVector.texture, settings.sharpnessFSR); } @@ -1821,20 +2174,73 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 if (upscaling.d3d12SwapChainActive && upscaling.settings.frameGenerationMode) upscaling.CopySharedD3D12Resources(); - if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) - upscaling.PerformUpscaling(); + // Increment diagnostic counter (rate-limits TAAReorder logging) + if (TAAReorder::g_initialized) { + TAAReorder::g_diagCounter = (TAAReorder::g_diagCounter + 1) % TAAReorder::DIAG_INTERVAL; + if (TAAReorder::g_diagCounter == 0) { + TAAReorder::g_frameSeqCounter = 0; + logger::info("[SEQ] Main_PostProcessing START seq={}", TAAReorder::g_frameSeqCounter++); + } + } + + bool peripheryTAA = TAAReorder::ShouldReorderTAA(); - if (upscaleMethod == UpscaleMethod::kDLSS) - upscaling.ApplySharpening(); + if (peripheryTAA) { + // ─── Periphery TAA with post-conductor DLSS (PureDark's approach) ─── + // func() with TAA enabled → conductor runs all passes unimpeded: + // Phase 2A: ExecutePassHook captures post-PP intermediate to g_postPPCopy + // Phase 5: TAA + DRS → submit texture + // After conductor: ConductorCallHook evaluates DLSS on g_postPPCopy, + // then pastes DLSS center onto submit texture - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + + // Reset per-frame flags + TAAReorder::g_postPPReady = false; + TAAReorder::g_dlssReady = false; + TAAReorder::g_dlssPasteComplete = false; + TAAReorder::g_phase5Complete = false; + TAAReorder::g_bsHookCallCount = 0; - BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod == UpscaleMethod::kTAA; + if (TAAReorder::g_diagCounter == 0) + logger::info("[TAAReorder] peripheryTAA: running func() with TAA enabled..."); - func(a_this, a3, a_target, a_4, a_5); + // func() with TAA ENABLED — DLSS eval + paste in ConductorCallHook (post-conductor) + BSImagespaceShaderISTemporalAA->taaEnabled = true; + func(a_this, a3, a_target, a_4, a_5); + + // Lock DRS + update camera (after conductor completes) + auto& runtimeData = globals::game::graphicsState->GetRuntimeData(); + runtimeData.dynamicResolutionLock = 1; + UpdateCameraData(); + + // Disable TAA for remainder of frame + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } else { + // ─── Normal flow (no periphery TAA) ─── + if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) + upscaling.PerformUpscaling(); + + if (upscaleMethod == UpscaleMethod::kDLSS) + upscaling.ApplySharpening(); + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + + BSImagespaceShaderISTemporalAA->taaEnabled = (upscaleMethod == UpscaleMethod::kTAA); + + if (TAAReorder::g_diagCounter == 0 && TAAReorder::g_initialized) + logger::info("[DIAG] Normal DLSS flow: taaEnabled={}, running func()...", BSImagespaceShaderISTemporalAA->taaEnabled); + + func(a_this, a3, a_target, a_4, a_5); + + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } - BSImagespaceShaderISTemporalAA->taaEnabled = false; + // VR CAS sharpening (after TAA) + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + globals::features::vrStereoOptimizations.ApplyCAS(a_target); } void Upscaling::SetScissorRect::thunk(RE::BSGraphics::Renderer* This, int a_left, int a_top, int a_right, int a_bottom) diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index 6cecf6cbaf..7eb13124e9 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -59,6 +59,8 @@ struct Upscaling : Feature float sharpnessDLSS = 0.0f; uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) + uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) }; Settings settings; @@ -110,6 +112,7 @@ struct Upscaling : Feature virtual void Load() override; virtual void PostPostLoad() override; virtual void SetupResources() override; + virtual std::vector GetActiveConstraints() const override; UpscaleMethod GetUpscaleMethod() const; @@ -138,7 +141,11 @@ struct Upscaling : Feature winrt::com_ptr vrClearHMDMaskCB; // Helper to dispatch mask clearing for a single eye region void ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX); + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY = 0, + uint32_t depthWidth = 0, uint32_t depthHeight = 0, + uint32_t colorWidth = 0, uint32_t colorHeight = 0, + ID3D11ShaderResourceView* fallbackSRV = nullptr, uint32_t fallbackOffsetX = 0); // Shared VR Per-Eye Intermediate Buffers // Owned here so both Streamline (DLSS) and FidelityFX (FSR) can use them. @@ -148,6 +155,34 @@ struct Upscaling : Feature eastl::unique_ptr vrIntermediateMotionVectors[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateReactiveMask[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateTransparencyMask[2]; // per-eye render resolution + eastl::unique_ptr vrFinalOutput[2]; // per-eye display-res composition target (VR viewport scaling) + eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) + + // Periphery TAA (conductor approach) — used by two-call func() flow + winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) + eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) + + // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) + winrt::com_ptr vrPeripheryFillCS; + winrt::com_ptr vrPeripheryFillCB; + winrt::com_ptr vrLinearSampler; + + // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) + winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) + winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) + winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params + ID3D11PixelShader* GetDlssCompositePS(); + ID3D11PixelShader* GetDlssUpscalePS(); + + struct DlssCompositeCB + { + float2 DynResScale; // renderRes / displayRes per-eye + float2 EyeOffset; // (i * eyeWidth, 0) + float2 SrcTexSize; // full texture dimensions + float2 pad; + }; + void FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV = nullptr); // Helper to create/resize per-eye buffers matching source formats void CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight, uint32_t outWidth, uint32_t outHeight, @@ -164,7 +199,7 @@ struct Upscaling : Feature void ConfigureTAA(); void ConfigureUpscaling(RE::BSGraphics::State* a_state); - void Upscale(); + void Upscale(ID3D11Texture2D* colorSourceOverride = nullptr); // D3D11 textures Texture2D* reactiveMaskTexture = nullptr; diff --git a/src/Features/VR.cpp b/src/Features/VR.cpp index e6ed6af7bb..a35c20d377 100644 --- a/src/Features/VR.cpp +++ b/src/Features/VR.cpp @@ -88,6 +88,12 @@ void VR::SetupResources() if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", edgeDetectionDefines, "cs_5_0"))) stereoBlendDebugEdgeDetectionCS.attach(rawPtr); + // Overwrite mode: direct replacement instead of blend (for stencil culling) + auto overwriteDefines = defines; + overwriteDefines.push_back({ "STEREO_OVERWRITE", "" }); + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", overwriteDefines, "cs_5_0"))) + stereoBlendOverwriteCS.attach(rawPtr); + auto renderer = globals::game::renderer; auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; D3D11_TEXTURE2D_DESC mainDesc; diff --git a/src/Features/VR.h b/src/Features/VR.h index e8372cbb6f..a9cab72282 100644 --- a/src/Features/VR.h +++ b/src/Features/VR.h @@ -260,7 +260,7 @@ struct VR : OverlayFeature StereoBlendDepthSigma = std::clamp(StereoBlendDepthSigma, 0.001f, 0.1f); StereoBlendMaxFactor = std::clamp(StereoBlendMaxFactor, 0.0f, 0.5f); StereoBlendColorThreshold = std::clamp(StereoBlendColorThreshold, 0.0f, 0.2f); - StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 3); + StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 5); } }; @@ -358,6 +358,7 @@ struct VR : OverlayFeature winrt::com_ptr stereoBlendDebugBackCheckCS; winrt::com_ptr stereoBlendDebugBlendWeightCS; winrt::com_ptr stereoBlendDebugEdgeDetectionCS; + winrt::com_ptr stereoBlendOverwriteCS; eastl::unique_ptr stereoBlendCopyTex; eastl::unique_ptr stereoBlendCB; @@ -368,7 +369,10 @@ struct VR : OverlayFeature float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint32_t DebugMode; + float FullBlendDistance; + float _pad[2]; }; // Engine hook integration points diff --git a/src/Features/VR/SettingsUI.cpp b/src/Features/VR/SettingsUI.cpp index c4ea6a562f..3e15342846 100644 --- a/src/Features/VR/SettingsUI.cpp +++ b/src/Features/VR/SettingsUI.cpp @@ -323,7 +323,7 @@ namespace ImGui::Separator(); - const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection" }; + const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection", "Overwrite", "Overwrite Eye1" }; ImGui::Combo("Debug View", &settings.StereoBlendDebugMode, debugModes, IM_ARRAYSIZE(debugModes)); if (auto _tt = Util::HoverTooltipWrapper()) { ImGui::Text( diff --git a/src/Features/VR/StereoBlend.cpp b/src/Features/VR/StereoBlend.cpp index 1fa5d22240..ff7eacf3e9 100644 --- a/src/Features/VR/StereoBlend.cpp +++ b/src/Features/VR/StereoBlend.cpp @@ -3,6 +3,7 @@ #include "Features/DynamicCubemaps.h" #include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" +#include "Features/VRStereoOptimizations.h" #include "State.h" void VR::ClearShaderCache() @@ -11,6 +12,7 @@ void VR::ClearShaderCache() stereoBlendDebugBackCheckCS = nullptr; stereoBlendDebugBlendWeightCS = nullptr; stereoBlendDebugEdgeDetectionCS = nullptr; + stereoBlendOverwriteCS = nullptr; } bool VR::AnyScreenSpaceEffectLoaded() @@ -22,10 +24,20 @@ bool VR::AnyScreenSpaceEffectLoaded() void VR::DrawStereoBlend() { - if (!REL::Module::IsVR() || !settings.EnableStereoBlend || !stereoBlendCS || !stereoBlendCopyTex || !stereoBlendCB) + bool vrStereoOptActive = globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off && + stereoBlendOverwriteCS; + + if (!REL::Module::IsVR() || !stereoBlendCopyTex || !stereoBlendCB) + return; + + if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugSkipMerge) + return; + + if (!vrStereoOptActive && (!settings.EnableStereoBlend || !stereoBlendCS)) return; - if (!AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) + if (!vrStereoOptActive && !AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) return; ZoneScoped; @@ -38,9 +50,10 @@ void VR::DrawStereoBlend() auto renderer = globals::game::renderer; auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; - auto* depthSRV = Util::GetCurrentSceneDepthSRV(); + // Use live depth buffer (kMAIN) — at DeferredPasses time this has the correct + // opaque geometry depth matching the composited color buffer. + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; - // Copy main color to read-only texture to avoid read/write race between eyes context->CopyResource(stereoBlendCopyTex->resource.get(), main.texture); auto dispatchCount = Util::GetScreenDispatchCount(true); @@ -55,37 +68,94 @@ void VR::DrawStereoBlend() cbData.MaxBlendFactor = settings.StereoBlendMaxFactor; cbData.ColorDiffThreshold = settings.StereoBlendColorThreshold; + // Pass debug edge tint from VRStereoOptimizations settings + if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugVisualization) + cbData.DebugEdgeTint = 0.3f; + else + cbData.DebugEdgeTint = 0.0f; + + // Debug mode: 0=normal, 1=depth map diagnostic, 2=full blend depth visualizer + if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugDepthMap) + cbData.DebugMode = 1u; + else if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugFullBlendDepth) + cbData.DebugMode = 2u; + else + cbData.DebugMode = 0u; + + cbData.FullBlendDistance = vrStereoOptActive ? globals::features::vrStereoOptimizations.settings.fullBlendDistance : 0.0f; + stereoBlendCB->Update(cbData); auto cbPtr = stereoBlendCB->CB(); - ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; - ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + auto& motionVectors = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR]; + + bool isOverwriteMode = vrStereoOptActive; ID3D11ComputeShader* activeCS = stereoBlendCS.get(); - if (settings.StereoBlendDebugMode == 1 && stereoBlendDebugBackCheckCS) - activeCS = stereoBlendDebugBackCheckCS.get(); - else if (settings.StereoBlendDebugMode == 2 && stereoBlendDebugBlendWeightCS) - activeCS = stereoBlendDebugBlendWeightCS.get(); - else if (settings.StereoBlendDebugMode == 3 && stereoBlendDebugEdgeDetectionCS) - activeCS = stereoBlendDebugEdgeDetectionCS.get(); + if (vrStereoOptActive) { + activeCS = stereoBlendOverwriteCS.get(); + } else { + int effectiveMode = settings.StereoBlendDebugMode; + if (effectiveMode == 1 && stereoBlendDebugBackCheckCS) + activeCS = stereoBlendDebugBackCheckCS.get(); + else if (effectiveMode == 2 && stereoBlendDebugBlendWeightCS) + activeCS = stereoBlendDebugBlendWeightCS.get(); + else if (effectiveMode == 3 && stereoBlendDebugEdgeDetectionCS) + activeCS = stereoBlendDebugEdgeDetectionCS.get(); + } + + // Save and unbind DSV to avoid SRV/DSV conflict on depth buffer in overwrite mode + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + if (isOverwriteMode) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + } + ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; context->CSSetConstantBuffers(1, 1, &cbPtr); context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetShader(activeCS, nullptr, 0); + if (isOverwriteMode) { + ID3D11ShaderResourceView* modeSRV = globals::features::vrStereoOptimizations.GetModeTextureSRV(); + if (modeSRV) + context->CSSetShaderResources(2, 1, &modeSRV); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, motionVectors.UAV }; + context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + } else { + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + } + + context->CSSetShader(activeCS, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); // Cleanup - srvs[0] = nullptr; - srvs[1] = nullptr; - uavs[0] = nullptr; - cbPtr = nullptr; - context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetConstantBuffers(1, 1, &cbPtr); + ID3D11ShaderResourceView* nullSRVs[3] = {}; + context->CSSetShaderResources(0, isOverwriteMode ? 3 : 2, nullSRVs); + ID3D11UnorderedAccessView* nullUAVs[2] = {}; + context->CSSetUnorderedAccessViews(0, isOverwriteMode ? 2 : 1, nullUAVs, nullptr); + ID3D11Buffer* nullCB = nullptr; + context->CSSetConstantBuffers(1, 1, &nullCB); context->CSSetShader(nullptr, nullptr, 0); + // Restore DSV after CS dispatch in overwrite mode + if (isOverwriteMode && savedDSV) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + savedDSV->Release(); + } + if (globals::state->frameAnnotations) globals::state->EndPerfEvent(); + } diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp new file mode 100644 index 0000000000..a242ccbc3b --- /dev/null +++ b/src/Features/VRStereoOptimizations.cpp @@ -0,0 +1,812 @@ +#include "VRStereoOptimizations.h" + +#include "Globals.h" +#include "State.h" +#include "Utils/D3D.h" +#include "Utils/Game.h" + +#include + +// JSON enum serialization for StereoMode +NLOHMANN_JSON_SERIALIZE_ENUM(VRStereoOptimizations::StereoMode, { + { VRStereoOptimizations::StereoMode::Off, "Off" }, + { VRStereoOptimizations::StereoMode::Enable, "Enable" }, +}) + +//============================================================================= +// SETTINGS MANAGEMENT +//============================================================================= + +void VRStereoOptimizations::SaveSettings(json& o_json) +{ + o_json["StereoMode"] = settings.stereoMode; + o_json["DisocclusionDepthThreshold"] = settings.disocclusionDepthThreshold; + o_json["FullBlendDistance"] = settings.fullBlendDistance; + o_json["QualityJitterOffset"] = settings.qualityJitterOffset; + o_json["FoveatedRegionRadius"] = settings.foveatedRegionRadius; + o_json["FoveatedRegionCenterX"] = settings.foveatedRegionCenterX; + o_json["FoveatedRegionCenterY"] = settings.foveatedRegionCenterY; + o_json["UseEyeTracking"] = settings.useEyeTracking; + o_json["DebugVisualization"] = settings.debugVisualization; + o_json["DebugSkipMerge"] = settings.debugSkipMerge; + o_json["DebugForceAllStencil"] = settings.debugForceAllStencil; + o_json["DebugForceAllReprojectCS"] = settings.debugForceAllReprojectCS; + o_json["DebugDepthMap"] = settings.debugDepthMap; + o_json["MipBiasMode"] = settings.mipBiasMode; + o_json["MipLodBias"] = settings.mipLodBias; + o_json["MipBiasNearDist"] = settings.mipBiasNearDist; + o_json["MipBiasFarDist"] = settings.mipBiasFarDist; + o_json["CASStrength"] = settings.casStrength; + o_json["AlphaTestThreshold"] = settings.alphaTestThreshold; +} + +void VRStereoOptimizations::LoadSettings(json& o_json) +{ + if (o_json.contains("StereoMode")) + settings.stereoMode = o_json["StereoMode"].get(); + if (o_json.contains("DisocclusionDepthThreshold")) + settings.disocclusionDepthThreshold = o_json["DisocclusionDepthThreshold"].get(); + if (o_json.contains("QualityJitterOffset")) + settings.qualityJitterOffset = o_json["QualityJitterOffset"].get(); + if (o_json.contains("FoveatedRegionRadius")) + settings.foveatedRegionRadius = o_json["FoveatedRegionRadius"].get(); + if (o_json.contains("FoveatedRegionCenterX")) + settings.foveatedRegionCenterX = o_json["FoveatedRegionCenterX"].get(); + if (o_json.contains("FoveatedRegionCenterY")) + settings.foveatedRegionCenterY = o_json["FoveatedRegionCenterY"].get(); + if (o_json.contains("UseEyeTracking")) + settings.useEyeTracking = o_json["UseEyeTracking"].get(); + if (o_json.contains("DebugVisualization")) + settings.debugVisualization = o_json["DebugVisualization"].get(); + if (o_json.contains("DebugSkipMerge")) + settings.debugSkipMerge = o_json["DebugSkipMerge"].get(); + if (o_json.contains("DebugForceAllStencil")) + settings.debugForceAllStencil = o_json["DebugForceAllStencil"].get(); + if (o_json.contains("DebugForceAllReprojectCS")) + settings.debugForceAllReprojectCS = o_json["DebugForceAllReprojectCS"].get(); + if (o_json.contains("DebugDepthMap")) + settings.debugDepthMap = o_json["DebugDepthMap"].get(); + if (o_json.contains("FullBlendDistance")) + settings.fullBlendDistance = o_json["FullBlendDistance"].get(); + if (o_json.contains("MipBiasMode")) + settings.mipBiasMode = o_json["MipBiasMode"].get(); + // Backwards compat: old bool EnableMipBias -> mode 2 (Distant Trees) + else if (o_json.contains("EnableMipBias") && o_json["EnableMipBias"].get()) + settings.mipBiasMode = 2; + if (o_json.contains("MipLodBias")) + settings.mipLodBias = o_json["MipLodBias"].get(); + if (o_json.contains("MipBiasNearDist")) + settings.mipBiasNearDist = o_json["MipBiasNearDist"].get(); + if (o_json.contains("MipBiasFarDist")) + settings.mipBiasFarDist = o_json["MipBiasFarDist"].get(); + if (o_json.contains("CASStrength")) + settings.casStrength = o_json["CASStrength"].get(); + if (o_json.contains("AlphaTestThreshold")) + settings.alphaTestThreshold = o_json["AlphaTestThreshold"].get(); +} + +void VRStereoOptimizations::RestoreDefaultSettings() +{ + settings = {}; +} + +//============================================================================= +// RESOURCE SETUP +//============================================================================= + +void VRStereoOptimizations::SetupResources() +{ + if (!REL::Module::IsVR()) + return; + + auto device = globals::d3d::device; + auto renderer = globals::game::renderer; + + // Constant buffers + paramsCB = eastl::make_unique(ConstantBufferDesc()); + + // Get main RT dimensions for per-eye calculations + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + D3D11_TEXTURE2D_DESC mainDesc; + main.texture->GetDesc(&mainDesc); + + // Per-pixel mode texture (R8_UINT, full SBS resolution = both eyes) + { + D3D11_TEXTURE2D_DESC modeDesc{}; + modeDesc.Width = mainDesc.Width; + modeDesc.Height = mainDesc.Height; + modeDesc.MipLevels = 1; + modeDesc.ArraySize = 1; + modeDesc.Format = DXGI_FORMAT_R8_UINT; + modeDesc.SampleDesc.Count = 1; + modeDesc.SampleDesc.Quality = 0; + modeDesc.Usage = D3D11_USAGE_DEFAULT; + modeDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + modeDesc.CPUAccessFlags = 0; + modeDesc.MiscFlags = 0; + + texPerPixelMode = eastl::make_unique(modeDesc); + texPerPixelMode->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + texPerPixelMode->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + // Depth-stencil state for stencil write pass: + // Depth test OFF (not rendering geometry), stencil ALWAYS + REPLACE with ref=1 + { + D3D11_DEPTH_STENCIL_DESC dssDesc{}; + dssDesc.DepthEnable = FALSE; + dssDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + dssDesc.StencilEnable = TRUE; + dssDesc.StencilReadMask = 0xFF; + dssDesc.StencilWriteMask = 0xFF; + dssDesc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; + dssDesc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + dssDesc.BackFace = dssDesc.FrontFace; + + DX::ThrowIfFailed(device->CreateDepthStencilState(&dssDesc, stencilWriteDSS.put())); + } + + // Rasterizer state for stencil write: no culling, no depth clip + { + D3D11_RASTERIZER_DESC rsDesc{}; + rsDesc.FillMode = D3D11_FILL_SOLID; + rsDesc.CullMode = D3D11_CULL_NONE; + rsDesc.DepthClipEnable = FALSE; + + DX::ThrowIfFailed(device->CreateRasterizerState(&rsDesc, stencilWriteRS.put())); + } + + // Read-only depth DSV for stencil write pass: allows simultaneous depth SRV binding. + // We write stencil but never write depth, so D3D11_DSV_READ_ONLY_DEPTH is safe. + { + auto& depthData = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depthData.views[0] && depthData.texture) { + D3D11_DEPTH_STENCIL_VIEW_DESC dsvDesc{}; + depthData.views[0]->GetDesc(&dsvDesc); + dsvDesc.Flags = D3D11_DSV_READ_ONLY_DEPTH; + + DX::ThrowIfFailed(device->CreateDepthStencilView(depthData.texture, &dsvDesc, stencilWriteReadOnlyDSV.put())); + } else { + logger::warn("[VRStereoOptimizations] Could not create read-only DSV: depth stencil data not available"); + } + } + + // CAS sharpness parameter buffer (structured buffer SRV to avoid cbuffer conflicts) + { + D3D11_BUFFER_DESC bufDesc{}; + bufDesc.ByteWidth = sizeof(float); + bufDesc.Usage = D3D11_USAGE_DYNAMIC; + bufDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + bufDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + bufDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + bufDesc.StructureByteStride = sizeof(float); + + float initSharpness = settings.casStrength; + D3D11_SUBRESOURCE_DATA initData{}; + initData.pSysMem = &initSharpness; + + DX::ThrowIfFailed(device->CreateBuffer(&bufDesc, &initData, casParamsBuf.put())); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc{}; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = 1; + DX::ThrowIfFailed(device->CreateShaderResourceView(casParamsBuf.get(), &srvDesc, casParamsSRV.put())); + } + + // CAS output texture (same format as main RT, with UAV capability) + { + D3D11_TEXTURE2D_DESC casDesc{}; + casDesc.Width = mainDesc.Width; + casDesc.Height = mainDesc.Height; + casDesc.MipLevels = 1; + casDesc.ArraySize = 1; + casDesc.Format = mainDesc.Format; + casDesc.SampleDesc.Count = 1; + casDesc.SampleDesc.Quality = 0; + casDesc.Usage = D3D11_USAGE_DEFAULT; + casDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + casDesc.CPUAccessFlags = 0; + casDesc.MiscFlags = 0; + + casTex = eastl::make_unique(casDesc); + casTex->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = mainDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + casTex->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = mainDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + CompileShaders(); + + logger::info("[VRStereoOptimizations] Resources created: mode tex {}x{} (full SBS)", mainDesc.Width, mainDesc.Height); +} + +void VRStereoOptimizations::CompileShaders() +{ + std::vector> csDefines = { + { "VR", nullptr }, + { "FRAMEBUFFER", nullptr } + }; + + std::vector> vspsDefines = { + { "VR", nullptr } + }; + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", csDefines, "cs_5_0")) + stencilCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS"); + + { + auto debugDefines = csDefines; + debugDefines.push_back({ "DEBUG_DEPTH_MAP", nullptr }); + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", debugDefines, "cs_5_0")) + stencilDebugDepthMapCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS (DEBUG_DEPTH_MAP)"); + } + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWriteVS.hlsl", vspsDefines, "vs_5_0")) + stencilWriteVS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWriteVS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWritePS.hlsl", vspsDefines, "ps_5_0")) + stencilWritePS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWritePS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\ReprojectionCS.hlsl", csDefines, "cs_5_0")) + reprojectionCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile ReprojectionCS"); + + { + std::vector> casDefines = {}; + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VR\\CASCS.hlsl", casDefines, "cs_5_0")) + casCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile CASCS"); + } +} + +void VRStereoOptimizations::ClearShaderCache() +{ + stencilCS = nullptr; + stencilDebugDepthMapCS = nullptr; + stencilWriteVS = nullptr; + stencilWritePS = nullptr; + reprojectionCS = nullptr; + casCS = nullptr; + dssCache.clear(); +} + +void VRStereoOptimizations::Reset() +{ + stencilActive = false; + stencilSwapCount = 0; +} + +//============================================================================= +// IMGUI SETTINGS +//============================================================================= + +void VRStereoOptimizations::DrawSettings() +{ + const char* modeNames[] = { "Off", "Enable" }; + int currentMode = static_cast(settings.stereoMode); + if (ImGui::Combo("Feature Enable", ¤tMode, modeNames, IM_ARRAYSIZE(modeNames))) + settings.stereoMode = static_cast(currentMode); + + // MIP LOD Bias section (always shown, independent of stereo mode) + ImGui::Separator(); + const char* mipBiasModes[] = { "Off", "All Textures", "Distant Trees" }; + ImGui::Combo("MIP LOD Bias", &settings.mipBiasMode, mipBiasModes, 3); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Off: No MIP bias\nAll Textures: Depth-gated sharpening for all textures\nDistant Trees: Depth-gated sharpening for foliage only"); + + if (settings.mipBiasMode > 0) { + ImGui::SliderFloat("MIP Bias Strength", &settings.mipLodBias, -3.0f, 0.0f, "%.2f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Negative = sharper. -0.5 subtle, -1.0 moderate, -2.0 aggressive."); + ImGui::SliderFloat("MIP Near Distance", &settings.mipBiasNearDist, 0.0f, 10000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Game units. No MIP bias closer than this distance."); + ImGui::SliderFloat("MIP Far Distance", &settings.mipBiasFarDist, 0.0f, 20000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Game units. Full MIP bias beyond this distance.\nSmooth ramp between near and far."); + } + ImGui::Separator(); + + + ImGui::SliderFloat("CAS Sharpening", &settings.casStrength, 0.0f, 1.0f, "%.2f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Contrast Adaptive Sharpening (intended for use with TAA).\n0 = disabled, higher = sharper."); + ImGui::Separator(); + + if (settings.stereoMode == StereoMode::Off) + return; + + ImGui::SliderFloat("Disocclusion Depth Threshold", &settings.disocclusionDepthThreshold, 0.001f, 0.1f, "%.4f"); + ImGui::SliderFloat("Full Blend Distance", &settings.fullBlendDistance, 0.0f, 10000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Geometry closer than this distance (game units) is fully shaded in both eyes and bilaterally blended for 2x supersampling. 0 = disabled."); + + if (globals::state->IsDeveloperMode()) { + if (ImGui::TreeNode("Debug")) { + ImGui::Checkbox("Skip Pixel Reprojection", &settings.debugSkipMerge); + ImGui::Checkbox("Full Blend Depth View", &settings.debugFullBlendDepth); + if (settings.debugFullBlendDepth) + ImGui::TextColored(ImVec4(0, 1, 1, 1), " Cyan = full blend zone (closer = stronger tint)"); + ImGui::Text("Stencil swaps this frame: %u", stencilSwapCount); + ImGui::TreePop(); + } + } +} + +//============================================================================= +// CONSTANT BUFFER UPDATE +//============================================================================= + +void VRStereoOptimizations::UpdateConstantBuffer() +{ + float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); + + VRStereoOptParams params{}; + params.FrameDim[0] = resolution.x; + params.FrameDim[1] = resolution.y; + params.RcpFrameDim[0] = 1.0f / resolution.x; + params.RcpFrameDim[1] = 1.0f / resolution.y; + params.StereoModeValue = static_cast(settings.stereoMode); + params.DisocclusionThreshold = settings.disocclusionDepthThreshold; + params.EdgeDepthThreshold = settings.edgeDepthThreshold; + params.EdgeWidth = static_cast(settings.edgeWidth); + params.QualityJitter[0] = settings.qualityJitterOffset; + params.QualityJitter[1] = settings.qualityJitterOffset; + params.FoveatedRadius = settings.foveatedRegionRadius; + params.FoveatedCenter[0] = settings.foveatedRegionCenterX; + params.FoveatedCenter[1] = settings.foveatedRegionCenterY; + params.MinEdgeDistance = settings.minEdgeDistance; + params.FullBlendDistance = settings.fullBlendDistance; + + paramsCB->Update(params); +} + +//============================================================================= +// PHASE 1: STENCIL CLASSIFICATION + WRITE +//============================================================================= + +void VRStereoOptimizations::DispatchStencil() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!stencilCS || !stencilWriteVS || !stencilWritePS || !texPerPixelMode || !paramsCB) + return; + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Stencil"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Stencil"); + + auto context = globals::d3d::context; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + // Use live depth buffer (kMAIN) instead of kPOST_ZPREPASS_COPY — at StartDeferred time, + // kPOST_ZPREPASS_COPY is stale (previous frame). kMAIN has fresh z-prepass depth so + // StencilCS can correctly detect sky-vs-geometry edges in the current frame. + auto renderer = globals::game::renderer; + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + + // Dispatch classification CS over Eye 1 region + // Input: t0 = depth, b1 = params CB + // Output: u0 = per-pixel mode texture + { + ID3D11ShaderResourceView* srvs[1]{ depthSRV }; + ID3D11UnorderedAccessView* uavs[1]{ texPerPixelMode->uav.get() }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 1, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + auto* activeStencilCS = (settings.debugDepthMap && stencilDebugDepthMapCS) ? stencilDebugDepthMapCS.get() : stencilCS.get(); + context->CSSetShader(activeStencilCS, nullptr, 0); + + uint32_t fullWidth = texPerPixelMode->desc.Width; + uint32_t fullHeight = texPerPixelMode->desc.Height; + context->Dispatch((fullWidth + 7) / 8, (fullHeight + 7) / 8, 1); + + // Cleanup CS bindings + ID3D11ShaderResourceView* nullSRV = nullptr; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 1, &nullSRV); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } + + // Transfer classification to hardware stencil buffer + ExecuteStencilWritePass(); + + stencilActive = true; + stencilSwapCount = 0; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::ExecuteStencilWritePass() +{ + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + + // ===== SAVE FULL D3D11 PIPELINE STATE ===== + + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + + ID3D11DepthStencilState* savedDSS = nullptr; + UINT savedStencilRef = 0; + context->OMGetDepthStencilState(&savedDSS, &savedStencilRef); + + ID3D11BlendState* savedBlendState = nullptr; + FLOAT savedBlendFactor[4] = {}; + UINT savedSampleMask = 0; + context->OMGetBlendState(&savedBlendState, savedBlendFactor, &savedSampleMask); + + ID3D11RasterizerState* savedRS = nullptr; + context->RSGetState(&savedRS); + + D3D11_VIEWPORT savedViewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; + UINT numViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + context->RSGetViewports(&numViewports, savedViewports); + + ID3D11VertexShader* savedVS = nullptr; + context->VSGetShader(&savedVS, nullptr, nullptr); + + ID3D11PixelShader* savedPS = nullptr; + context->PSGetShader(&savedPS, nullptr, nullptr); + + ID3D11GeometryShader* savedGS = nullptr; + context->GSGetShader(&savedGS, nullptr, nullptr); + + ID3D11InputLayout* savedInputLayout = nullptr; + context->IAGetInputLayout(&savedInputLayout); + + D3D11_PRIMITIVE_TOPOLOGY savedTopology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + context->IAGetPrimitiveTopology(&savedTopology); + + ID3D11ShaderResourceView* savedPSSRVs[2] = {}; + context->PSGetShaderResources(0, 2, savedPSSRVs); + + ID3D11Buffer* savedPSCB = nullptr; + context->PSGetConstantBuffers(1, 1, &savedPSCB); + + // ===== SET UP STENCIL WRITE PASS ===== + + // Use our custom read-only-depth DSV to allow simultaneous depth SRV binding (t1). + // D3D11_DSV_READ_ONLY_DEPTH permits depth SRV + stencil write simultaneously. + // Using views[0] would cause D3D11 to silently NULL the depth SRV. + // depthData.readOnlyViews[0] has BOTH read-only flags and doesn't allow stencil writes. + context->OMSetRenderTargets(0, nullptr, stencilWriteReadOnlyDSV.get()); + context->OMSetDepthStencilState(stencilWriteDSS.get(), 1); + context->RSSetState(stencilWriteRS.get()); + + // Eye 1 viewport (right half of SBS buffer) + { + D3D11_TEXTURE2D_DESC mainDesc; + renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN].texture->GetDesc(&mainDesc); + + D3D11_VIEWPORT vp{}; + vp.TopLeftX = static_cast(mainDesc.Width / 2); + vp.TopLeftY = 0.0f; + vp.Width = static_cast(mainDesc.Width / 2); + vp.Height = static_cast(mainDesc.Height); + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + } + + // Bind shaders and mode texture + context->VSSetShader(stencilWriteVS.get(), nullptr, 0); + context->PSSetShader(stencilWritePS.get(), nullptr, 0); + context->GSSetShader(nullptr, nullptr, 0); + + ID3D11ShaderResourceView* modeSRV = texPerPixelMode->srv.get(); + context->PSSetShaderResources(0, 1, &modeSRV); + + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + context->PSSetShaderResources(1, 1, &depthSRV); + + // Bind params CB to pixel shader (CS and PS have separate CB bindings) + auto cbPtr = paramsCB->CB(); + context->PSSetConstantBuffers(1, 1, &cbPtr); + + // Fullscreen triangle: no VB/IB, procedurally generated in VS + context->IASetInputLayout(nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + context->Draw(3, 0); + + // ===== RESTORE FULL D3D11 PIPELINE STATE ===== + + ID3D11ShaderResourceView* nullSRVs[2] = {}; + context->PSSetShaderResources(0, 2, nullSRVs); + + context->PSSetConstantBuffers(1, 1, &savedPSCB); + + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + context->OMSetDepthStencilState(savedDSS, savedStencilRef); + context->OMSetBlendState(savedBlendState, savedBlendFactor, savedSampleMask); + context->RSSetState(savedRS); + context->RSSetViewports(numViewports, savedViewports); + context->VSSetShader(savedVS, nullptr, 0); + context->PSSetShader(savedPS, nullptr, 0); + context->GSSetShader(savedGS, nullptr, 0); + context->IASetInputLayout(savedInputLayout); + context->IASetPrimitiveTopology(savedTopology); + context->PSSetShaderResources(0, 2, savedPSSRVs); + + // Release COM references acquired by Get* calls + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + if (savedDSV) + savedDSV->Release(); + if (savedDSS) + savedDSS->Release(); + if (savedBlendState) + savedBlendState->Release(); + if (savedRS) + savedRS->Release(); + if (savedVS) + savedVS->Release(); + if (savedPS) + savedPS->Release(); + if (savedGS) + savedGS->Release(); + if (savedInputLayout) + savedInputLayout->Release(); + if (savedPSSRVs[0]) + savedPSSRVs[0]->Release(); + if (savedPSSRVs[1]) + savedPSSRVs[1]->Release(); + if (savedPSCB) + savedPSCB->Release(); +} + +void VRStereoOptimizations::PerformLateStencilWrite() +{ + // Placeholder for future multi-pass stencil strategies +} + +//============================================================================= +// DSS CACHE: CLONE + STENCIL NOT_EQUAL ENFORCEMENT +//============================================================================= + +ID3D11DepthStencilState* VRStereoOptimizations::GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS) +{ + if (!originalDSS || !stencilActive) + return originalDSS; + + stencilSwapCount++; + + auto it = dssCache.find(originalDSS); + if (it != dssCache.end()) + return it->second.get(); + + // Clone original desc and add read-only stencil NOT_EQUAL test + D3D11_DEPTH_STENCIL_DESC desc{}; + originalDSS->GetDesc(&desc); + + desc.StencilEnable = TRUE; + desc.StencilReadMask = 0xFF; + desc.StencilWriteMask = 0x00; // Read-only: game rendering must not modify our marks + + // NOT_EQUAL with ref=1: skip pixels where stencil == 1 (MODE_MAIN) + desc.FrontFace.StencilFunc = D3D11_COMPARISON_NOT_EQUAL; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.BackFace = desc.FrontFace; + + winrt::com_ptr modifiedDSS; + HRESULT hr = globals::d3d::device->CreateDepthStencilState(&desc, modifiedDSS.put()); + if (FAILED(hr)) { + logger::warn("[VRStereoOptimizations] Failed to create modified DSS (HRESULT: {:#x})", static_cast(hr)); + return originalDSS; + } + + auto* result = modifiedDSS.get(); + dssCache[originalDSS] = std::move(modifiedDSS); + + return result; +} + +//============================================================================= +// PHASE 3: REPROJECTION COMPUTE SHADER +//============================================================================= + +void VRStereoOptimizations::DispatchReprojection() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!reprojectionCS || !texPerPixelMode || !paramsCB) + return; + if (settings.debugSkipMerge) + return; + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Reprojection"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Reprojection"); + + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + auto* depthSRV = Util::GetCurrentSceneDepthSRV(); + + // Bind: t0 = depth, t1 = mode texture, u0 = main UAV, b1 = params + ID3D11ShaderResourceView* srvs[2]{ + depthSRV, + texPerPixelMode->srv.get() + }; + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 2, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(reprojectionCS.get(), nullptr, 0); + + // Dispatch over full SBS texture + uint32_t fullWidth = texPerPixelMode->desc.Width; + uint32_t fullHeight = texPerPixelMode->desc.Height; + context->Dispatch((fullWidth + 7) / 8, (fullHeight + 7) / 8, 1); + + // Cleanup + ID3D11ShaderResourceView* nullSRVs[2] = {}; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + + // Stencil culling is done for this frame + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::DeactivateStencil() +{ + if (!stencilActive) + return; + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; +} + +//============================================================================= +// CAS (CONTRAST ADAPTIVE SHARPENING) - POST-TAA +//============================================================================= + +void VRStereoOptimizations::ApplyCAS(RE::RENDER_TARGET a_target) +{ + logger::trace("[VRStereoOptimizations] CAS: entered (strength={}, casCS={}, casTex={}, casParamsBuf={})", + settings.casStrength, (void*)casCS.get(), (void*)casTex.get(), (void*)casParamsBuf.get()); + + if (settings.casStrength <= 0.0f || !casCS || !casTex || !casParamsBuf) + return; + + if (!REL::Module::IsVR()) + return; + + auto renderer = globals::game::renderer; + auto context = globals::d3d::context; + + // Get the render target that post-processing just wrote to + auto& target = renderer->GetRuntimeData().renderTargets[a_target]; + if (!target.texture || !target.SRV) { + logger::trace("[VRStereoOptimizations] CAS: target RT has no texture/SRV, skipping"); + return; + } + + D3D11_TEXTURE2D_DESC targetDesc; + target.texture->GetDesc(&targetDesc); + logger::trace("[VRStereoOptimizations] CAS: dispatching on RT {} ({}x{}, strength={})", (int)a_target, targetDesc.Width, targetDesc.Height, settings.casStrength); + + // Check for dimension/format mismatch with intermediate texture + D3D11_TEXTURE2D_DESC casTexDesc; + static_cast(casTex->resource.get())->GetDesc(&casTexDesc); + if (casTexDesc.Width != targetDesc.Width || casTexDesc.Height != targetDesc.Height || casTexDesc.Format != targetDesc.Format) { + logger::info("[VRStereoOptimizations] CAS: recreating casTex to match target ({}x{} fmt={} -> {}x{} fmt={})", + casTexDesc.Width, casTexDesc.Height, (int)casTexDesc.Format, + targetDesc.Width, targetDesc.Height, (int)targetDesc.Format); + + D3D11_TEXTURE2D_DESC newDesc{}; + newDesc.Width = targetDesc.Width; + newDesc.Height = targetDesc.Height; + newDesc.MipLevels = 1; + newDesc.ArraySize = 1; + newDesc.Format = targetDesc.Format; + newDesc.SampleDesc.Count = 1; + newDesc.SampleDesc.Quality = 0; + newDesc.Usage = D3D11_USAGE_DEFAULT; + newDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + newDesc.CPUAccessFlags = 0; + newDesc.MiscFlags = 0; + + casTex = eastl::make_unique(newDesc); + casTex->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = targetDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + casTex->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = targetDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + // Update sharpness parameter via Map/Unmap + { + D3D11_MAPPED_SUBRESOURCE mapped; + if (SUCCEEDED(context->Map(casParamsBuf.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped))) { + *static_cast(mapped.pData) = settings.casStrength; + context->Unmap(casParamsBuf.get(), 0); + } + } + + // Unbind the RT so we can read from it + context->OMSetRenderTargets(0, nullptr, nullptr); + + // Dispatch CAS: read from target SRV, write to casTex UAV + { + ID3D11ShaderResourceView* views[2] = { target.SRV, casParamsSRV.get() }; + context->CSSetShaderResources(0, 2, views); + + ID3D11UnorderedAccessView* uavs[1] = { casTex->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + context->CSSetShader(casCS.get(), nullptr, 0); + + context->Dispatch((targetDesc.Width + 7) / 8, (targetDesc.Height + 7) / 8, 1); + } + + // Cleanup CS state + ID3D11ShaderResourceView* nullSRV[2] = { nullptr, nullptr }; + context->CSSetShaderResources(0, 2, nullSRV); + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + + // Copy sharpened result back to the render target + context->CopyResource(target.texture, casTex->resource.get()); + + globals::game::stateUpdateFlags->set(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET); +} diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h new file mode 100644 index 0000000000..7aed5ff837 --- /dev/null +++ b/src/Features/VRStereoOptimizations.h @@ -0,0 +1,230 @@ +#pragma once + +#include "Feature.h" + +#include +#include +#include + +/** + * @brief VR Stereo Rendering Optimizations feature. + * + * Uses hardware stencil culling to skip Eye 1 pixel shading for pixels that can be + * reprojected from Eye 0 via lateral stereo reprojection, then runs a compute shader + * to fill those pixels. This avoids redundant pixel shading in overlapping stereo regions. + * + * Pipeline: + * 1. DispatchStencil() - CS classifies per-pixel reprojection viability into a mode texture, + * then a fullscreen VS/PS pass writes that classification into the stencil buffer. + * 2. (Game renders Eye 1) - Hardware stencil test skips shading for marked pixels. + * 3. DispatchReprojection() - CS reprojects Eye 0 color into the skipped Eye 1 pixels. + */ +struct VRStereoOptimizations : public Feature +{ + //============================================================================= + // ENUMS + //============================================================================= + + /// Operating mode for stereo reprojection + enum class StereoMode : uint32_t + { + Off = 0, ///< Feature disabled + Enable = 1 ///< Stereo reprojection enabled + }; + + /// Per-pixel classification written by StencilCS + enum PixelMode : uint8_t + { + MODE_DISOCCLUDED = 0, ///< Fully shaded, no reprojection, no blend + MODE_EDGE = 1, ///< Fully shaded + bilateral blend with other eye + MODE_MAIN = 2, ///< Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite (Perf) / bilateral (Quality) + MODE_EDGE_NEIGHBOUR = 3, ///< Outer band: background pixels near edge, blended in post-process + }; + + //============================================================================= + // FEATURE BASE CLASS OVERRIDES + //============================================================================= + + virtual inline std::string GetName() override { return "VR Stereo Optimizations"; } + virtual inline std::string GetShortName() override { return "VRStereoOptimizations"; } + virtual inline std::string_view GetShaderDefineName() override { return "VR_STEREO_OPT"; } + virtual inline std::string_view GetCategory() const override { return "Display"; } + virtual inline bool HasShaderDefine(RE::BSShader::Type t) override { return t == RE::BSShader::Type::Utility; } + virtual inline bool SupportsVR() override { return true; } + + virtual void SetupResources() override; + virtual void Reset() override; + virtual void DrawSettings() override; + virtual void SaveSettings(json& o_json) override; + virtual void LoadSettings(json& o_json) override; + virtual void RestoreDefaultSettings() override; + virtual void ClearShaderCache() override; + + virtual std::pair> GetFeatureSummary() override + { + return { + "Stereo rendering optimizations for VR that skip redundant pixel shading via stencil culling and lateral reprojection.", + { "Hardware stencil culling of Eye 1 pixels reprojectable from Eye 0", + "Compute shader lateral reprojection to fill culled pixels", + "Performance, Quality, and Foveated modes", + "Debug visualization overlays" } + }; + } + + //============================================================================= + // SETTINGS + //============================================================================= + + struct Settings + { + StereoMode stereoMode = StereoMode::Enable; + float disocclusionDepthThreshold = 0.01f; + float edgeDepthThreshold = 0.05f; + int edgeWidth = 3; ///< Half-width of edge band in pixels (total band = 2 * edgeWidth) + float minEdgeDistance = 5000.0f; ///< Minimum linearized depth for edge AA (game units) + float fullBlendDistance = 0.0f; ///< Linearized depth below which both eyes are fully shaded + blended (game units) + bool debugFullBlendDepth = false; ///< Show full blend depth zone as cyan overlay + float qualityJitterOffset = 0.125f; + float foveatedRegionRadius = 0.3f; + float foveatedRegionCenterX = 0.5f; + float foveatedRegionCenterY = 0.5f; + bool useEyeTracking = false; + + int reprojectionMode = 5; // 0=Blend, 4=Overwrite, 5=Overwrite Eye1 Only + + // Debug controls + bool debugVisualization = false; + bool debugSkipMerge = false; + bool debugForceAllStencil = false; + bool debugForceAllReprojectCS = false; + bool debugDepthMap = false; + + // MIP LOD Bias (negative = sharper textures) + // 0 = Off, 1 = All textures (global), 2 = Distant trees only (depth-gated TREE_ANIM) + int mipBiasMode = 0; + float mipLodBias = -2.0f; + float mipBiasNearDist = 2000.0f; ///< Game units: no bias closer than this + float mipBiasFarDist = 6000.0f; ///< Game units: full bias beyond this + + // CAS (Contrast Adaptive Sharpening) - post-TAA + float casStrength = 0.7f; ///< 0.0 = disabled, 0.0-1.0 = subtle to strong + float alphaTestThreshold = 0.001f; ///< Alpha floor for TREE_ANIM zombie texel removal + } settings; + + //============================================================================= + // GPU CONSTANT BUFFER (must match HLSL cbuffer layout exactly) + //============================================================================= + + struct alignas(16) VRStereoOptParams + { + float FrameDim[2]; // Full stereo buffer dimensions + float RcpFrameDim[2]; // 1.0 / FrameDim + + uint32_t StereoModeValue; // Cast of StereoMode enum (0-3) + float DisocclusionThreshold; + float EdgeDepthThreshold; + uint32_t EdgeWidth; + + float QualityJitter[2]; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; + float pad2; + + float FoveatedCenter[2]; // Foveal region center UV + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth for full blend zone + }; + static_assert(sizeof(VRStereoOptParams) % 16 == 0, "VRStereoOptParams must be 16-byte aligned for HLSL cbuffer."); + + //============================================================================= + // PUBLIC API + //============================================================================= + + /** + * @brief Classify Eye 1 pixels and write stencil marks. + * + * Dispatches the stencil classification CS, then performs a fullscreen triangle pass + * to write the classification into the hardware stencil buffer. + * Called from Deferred::StartDeferred() after OverrideBlendStates(). + */ + void DispatchStencil(); + + /** + * @brief Reproject Eye 0 color into stencil-culled Eye 1 pixels. + * + * Copies the main render target, then dispatches a CS to fill skipped pixels + * using lateral reprojection from Eye 0. + * Called from Deferred::DeferredPasses() after DeferredCompositeCS. + */ + void DispatchReprojection(); + + /** + * @brief Creates or retrieves a modified DSS with stencil NOT_EQUAL test. + * + * Clones the given DSS with read-only stencil (WriteMask=0x00, Func=NOT_EQUAL, ref=1) + * so that pixels marked by our stencil write pass are skipped during normal rendering. + * Cached per unique input DSS pointer. + * + * @param originalDSS The original depth-stencil state to modify. + * @return Modified DSS with stencil test, or original if creation fails. + */ + ID3D11DepthStencilState* GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS); + + /// Whether the stencil pass is currently active this frame + bool IsStencilActive() const { return stencilActive; } + + /// Deactivate stencil culling (called from Deferred after geometry rendering completes) + void DeactivateStencil(); + + /// Apply CAS sharpening to the main render target (called after TAA) + void ApplyCAS(RE::RENDER_TARGET a_target); + + /// Get mode texture SRV for external consumers (e.g., DeferredCompositeCS Eye 1 skip) + ID3D11ShaderResourceView* GetModeTextureSRV() const { return texPerPixelMode ? texPerPixelMode->srv.get() : nullptr; } + +private: + //============================================================================= + // INTERNAL METHODS + //============================================================================= + + /// Fullscreen triangle pass: reads mode texture, writes stencil ref=1 for MODE_MAIN pixels + void ExecuteStencilWritePass(); + + /// Late stencil write callback (placeholder for future multi-pass strategies) + void PerformLateStencilWrite(); + + /// Compiles all shaders used by this feature + void CompileShaders(); + + /// Updates the constant buffer with current settings and frame dimensions + void UpdateConstantBuffer(); + + //============================================================================= + // GPU RESOURCES + //============================================================================= + + eastl::unique_ptr paramsCB; + eastl::unique_ptr texPerPixelMode; ///< R8_UINT classification texture (full SBS resolution) + eastl::unique_ptr reprojectionCopyTex; ///< Copy of main RT for reprojection read + + winrt::com_ptr stencilWriteDSS; + winrt::com_ptr stencilWriteRS; + winrt::com_ptr stencilWriteReadOnlyDSV; ///< Read-only-depth DSV for stencil write pass (allows simultaneous depth SRV) + + winrt::com_ptr stencilCS; + winrt::com_ptr stencilDebugDepthMapCS; + winrt::com_ptr stencilWriteVS; + winrt::com_ptr stencilWritePS; + winrt::com_ptr reprojectionCS; + + // CAS sharpening resources + winrt::com_ptr casCS; + eastl::unique_ptr casTex; ///< UAV-capable texture for CAS output + winrt::com_ptr casParamsBuf; ///< Structured buffer for CAS sharpness param + winrt::com_ptr casParamsSRV; ///< SRV for CAS sharpness param + + /// Cache of original DSS -> modified DSS with stencil NOT_EQUAL enforcement + std::unordered_map> dssCache; + + bool stencilActive = false; + uint32_t stencilSwapCount = 0; +}; diff --git a/src/Globals.cpp b/src/Globals.cpp index e90c3bf4ce..8ece4c4b4b 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -33,6 +33,7 @@ #include "Features/VolumetricShadows.h" #include "Features/WaterEffects.h" #include "Features/WeatherEditor.h" +#include "Features/VRStereoOptimizations.h" #include "Features/WetnessEffects.h" #include "Menu.h" #include "ShaderCache.h" @@ -84,6 +85,7 @@ namespace globals RenderDoc renderDoc{}; WeatherEditor weatherEditor{}; ExponentialHeightFog exponentialHeightFog{}; + VRStereoOptimizations vrStereoOptimizations{}; namespace llf { @@ -266,13 +268,64 @@ namespace globals { static void thunk(ID3D11DeviceContext* This, ID3D11Resource* pResource, UINT Subresource) { - if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) + if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) { CacheFramebuffer(); + } func(This, pResource, Subresource); } static inline REL::Relocation func; }; + /** + * @brief Hooked OMSetDepthStencilState — replaces DSS with stencil-enforcing version when VR stereo opt is active. + * + * vtable index 36 for ID3D11DeviceContext::OMSetDepthStencilState. + * When VRStereoOptimizations has written stencil marks, this hook transparently swaps + * the game's DSS for a modified version that adds a stencil NOT_EQUAL test, causing + * marked Eye 1 pixels to be skipped during normal rendering. + */ + struct ID3D11DeviceContext_OMSetDepthStencilState + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilState* pDepthStencilState, UINT StencilRef) + { + if (globals::game::isVR && pDepthStencilState) { + auto& stereoOpt = globals::features::vrStereoOptimizations; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + pDepthStencilState = stereoOpt.GetOrCreateModifiedDSS(pDepthStencilState); + StencilRef = 1; // Must match the ref written by our stencil pass + + } + } + func(This, pDepthStencilState, StencilRef); + } + static inline REL::Relocation func; + }; + + /** + * @brief Hooked ClearDepthStencilView — blocks stencil clears when VR stereo opt stencil is active. + * + * vtable index 53 for ID3D11DeviceContext::ClearDepthStencilView. + * Prevents the game from clearing our stencil marks between the stencil write and + * the reprojection pass by stripping the D3D11_CLEAR_STENCIL flag. + */ + struct ID3D11DeviceContext_ClearDepthStencilView + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilView* pDepthStencilView, UINT ClearFlags, FLOAT Depth, UINT8 Stencil) + { + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vrStereoOptimizations; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + // Strip stencil clear to preserve our marks; allow depth clear to proceed + ClearFlags &= ~D3D11_CLEAR_STENCIL; + if (ClearFlags == 0) + return; // Nothing left to clear + } + } + func(This, pDepthStencilView, ClearFlags, Depth, Stencil); + } + static inline REL::Relocation func; + }; + /** * @brief Installs hooks on the Map and Unmap methods of the provided D3D11 device context. * @@ -282,5 +335,11 @@ namespace globals { stl::detour_vfunc<14, ID3D11DeviceContext_Map>(a_context); stl::detour_vfunc<15, ID3D11DeviceContext_Unmap>(a_context); + + // VR stereo optimization hooks: intercept DSS and stencil clear + if (globals::game::isVR) { + stl::detour_vfunc<36, ID3D11DeviceContext_OMSetDepthStencilState>(a_context); + stl::detour_vfunc<53, ID3D11DeviceContext_ClearDepthStencilView>(a_context); + } } } diff --git a/src/Globals.h b/src/Globals.h index fa96446891..9318c6b8d3 100644 --- a/src/Globals.h +++ b/src/Globals.h @@ -34,6 +34,7 @@ struct ExtendedTranslucency; struct Upscaling; struct WeatherEditor; struct ExponentialHeightFog; +struct VRStereoOptimizations; class State; class Deferred; @@ -91,6 +92,7 @@ namespace globals extern RenderDoc renderDoc; extern WeatherEditor weatherEditor; extern ExponentialHeightFog exponentialHeightFog; + extern VRStereoOptimizations vrStereoOptimizations; namespace llf { diff --git a/src/State.cpp b/src/State.cpp index 9794ce9441..5b9524f354 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -11,6 +11,7 @@ #include "Features/TerrainBlending.h" #include "Features/TerrainHelper.h" #include "Features/Upscaling.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricShadows.h" #include "Features/WeatherEditor.h" #include "Menu.h" @@ -850,6 +851,22 @@ void State::UpdateSharedData([[maybe_unused]] bool a_inWorld, [[maybe_unused]] b data.MipBias = 0; } + // VR MIP bias modes: 1 = All (global), 2 = Distant Trees (per-pixel in TREE_ANIM) + data.VRMipBias = 0; + data.VRMipBiasNearDist = 2000.0f; + data.VRMipBiasFarDist = 6000.0f; + data.VRMipBiasMode = 0; + if (globals::game::isVR) { + auto& s = globals::features::vrStereoOptimizations.settings; + if (s.mipBiasMode == 1 || s.mipBiasMode == 2) { + data.VRMipBias = s.mipLodBias; + data.VRMipBiasNearDist = s.mipBiasNearDist; + data.VRMipBiasFarDist = s.mipBiasFarDist; + data.VRMipBiasMode = static_cast(s.mipBiasMode); + } + data.VRAlphaTestThreshold = s.alphaTestThreshold; + } + // DALC to SH const auto& m = dalcTransform.rotate; const auto& t = dalcTransform.translate; @@ -864,7 +881,7 @@ void State::UpdateSharedData([[maybe_unused]] bool a_inWorld, [[maybe_unused]] b SphericalHarmonics::SH2Color dalcSH = SphericalHarmonics::DALCToSH(dalcColors); data.AmbientSHR = { dalcSH.r.c0, dalcSH.r.c1[0], dalcSH.r.c1[1], dalcSH.r.c1[2] }; data.AmbientSHG = { dalcSH.g.c0, dalcSH.g.c1[0], dalcSH.g.c1[1], dalcSH.g.c1[2] }; - data.AmbientSHB = { dalcSH.b.c0, dalcSH.b.c1[0], dalcSH.b.c1[1], dalcSH.b.c1[2] }; + data.AmbientSHB = { dalcSH.b.c0, dalcSH.b.c1[0], dalcSH.b.c1[1], dalcSH.b.c1[2] } sharedDataCB->Update(data); } diff --git a/src/State.h b/src/State.h index 5682562681..e0e01918d8 100644 --- a/src/State.h +++ b/src/State.h @@ -210,6 +210,11 @@ class State uint InMapMenu; uint HideSky; float MipBias; + float VRMipBias; + float VRMipBiasNearDist; + float VRMipBiasFarDist; + uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees only + float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = use vanilla) float pad0; float4 AmbientSHR; float4 AmbientSHG; From e1fdc0c729390f2e2f02bf384145fc1af61540dd Mon Sep 17 00:00:00 2001 From: vrnord Date: Sat, 14 Mar 2026 12:27:38 -0600 Subject: [PATCH 02/16] docs(vr): explain why UpgradeBackendInterface is skipped for VR Streamline proxy wrapping disrupts VR compositor frame pacing. DLSS works without wrapped interfaces; only frame generation needs them. Co-Authored-By: Claude Opus 4.6 --- src/Features/Upscaling.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 33e8f1098b..117670e19a 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -147,7 +147,10 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( ppImmediateContext); if (upscaling.IsBackendInitialized()) { - // Skip interface wrapping for VR - it can affect frame pacing with VR compositor + // Skip Streamline interface wrapping for VR — slUpgradeInterface wraps the D3D + // device and swap chain with Streamline proxy objects, which disrupts VR compositor + // frame pacing (causes judder/stuttering). DLSS still functions without wrapped + // interfaces; only frame generation requires them (and that's already VR-gated above). if (!globals::game::isVR) { upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); From bd1c480a5601045bd6203d59b31a6cb81e484e35 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:30:07 -0600 Subject: [PATCH 03/16] feat(vr-stereo-opt): bilinear color sampling for stereo reprojection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace nearest-neighbor integer Load() with hardware bilinear SampleLevel() for color reads during Eye 1 reprojection. Fixes visible "distortion ridges" at iso-depth contours where floor(offset) jumps by 1 pixel. Motion vectors remain integer-sampled via Load() — DLSS requires discrete motion data. The decoupled approach eliminates visual artifacts without affecting temporal accumulation. - Add SampleReprojectedColor() helper using GetDimensions() + UV math - Bind linear sampler at s0 in overwrite mode (lazy-created) - Clamp UVs to active DRS viewport bounds Co-Authored-By: Claude Opus 4.6 --- package/Shaders/VR/StereoBlendCS.hlsl | 82 +++++++++++++++++++++++++-- src/Features/VR.h | 4 +- src/Features/VR/StereoBlend.cpp | 30 +++++++++- 3 files changed, 107 insertions(+), 9 deletions(-) diff --git a/package/Shaders/VR/StereoBlendCS.hlsl b/package/Shaders/VR/StereoBlendCS.hlsl index 7983c25e76..0652844b29 100644 --- a/package/Shaders/VR/StereoBlendCS.hlsl +++ b/package/Shaders/VR/StereoBlendCS.hlsl @@ -22,6 +22,8 @@ RWTexture2D OutputRW : register(u0); #ifdef STEREO_OVERWRITE RWTexture2D MotionRW : register(u1); Texture2D ModeTexture : register(t2); +Texture2D ReflectanceTexture : register(t3); // .w = POM pixelOffset from Lighting pass +SamplerState LinearSampler : register(s0); // Mode constants matching VRStereoOptimizations/cbuffers.hlsli // (can't include directly — its cbuffer on b1 conflicts with StereoBlendCB) @@ -30,6 +32,23 @@ Texture2D ModeTexture : register(t2); #define MODE_MAIN 2 #define MODE_EDGE_NEIGHBOUR 3 #define MODE_FULL_BLEND 4 + +// Hardware bilinear color sample from reprojected pixel coordinates. +// Converts integer pixel coords to proper full-texture UV for SampleLevel, +// clamped to the active DRS viewport to prevent sampling stale data. +// Motion vectors stay as integer Load() — filtering them breaks DLSS. +float4 SampleReprojectedColor(int2 reprojPx, float2 frameDim) +{ + uint texW, texH; + ColorTexture.GetDimensions(texW, texH); + float2 texSize = float2(texW, texH); + float2 sampleUV = (float2(reprojPx) + 0.5) / texSize; + // Clamp to active DRS viewport bounds (half-texel inset to keep bilinear inside valid region) + float2 minUV = 0.5 / texSize; + float2 maxUV = (frameDim - 0.5) / texSize; + sampleUV = clamp(sampleUV, minUV, maxUV); + return ColorTexture.SampleLevel(LinearSampler, sampleUV, 0); +} #endif cbuffer StereoBlendCB : register(b1) @@ -40,9 +59,10 @@ cbuffer StereoBlendCB : register(b1) float MaxBlendFactor; float ColorDiffThreshold; float DebugEdgeTint; - uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer, 3 = POM depth heatmap float FullBlendDistance; - float2 _pad; + float POMDepthScale; + float _pad; }; static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend @@ -109,6 +129,20 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) return; } + // Debug mode 3: POM depth data visualizer — show Reflectance.w as color + if (DebugMode == 3) { + float pomVal = ReflectanceTexture[dtid].w; + float4 c = ColorTexture[dtid]; + if (pomVal > 1e-2) { + // POM pixel: red-to-green gradient based on parallaxAmount + // Red = peak (high pomVal, closer to camera), Green = valley (low pomVal, farther), Yellow = geometry plane + float3 pomColor = float3(pomVal, 1.0 - pomVal, 0); + OutputRW[dtid] = float4(lerp(c.rgb, pomColor, 0.7), c.a); + } + // Non-POM pixels (pomVal ~ 0) left untouched + return; + } + // MODE_DISOCCLUDED: fully shaded, leave untouched if (pixelMode == MODE_DISOCCLUDED) return; @@ -117,8 +151,23 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (pixelMode == MODE_FULL_BLEND) { float4 center = ColorTexture[dtid]; + // Check for POM depth offset at this pixel + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float reprojDepthFB = centerDepth; + float pomOffsetFB = ReflectanceTexture[dtid].w; + if (pomOffsetFB > 1e-2 && POMDepthScale > 0) { + float linDepthFB = SharedData::GetScreenDepth(centerDepth); + float depthCorrectionFB = (0.5 - pomOffsetFB) * POMDepthScale; + float newLinDepthFB = linDepthFB + depthCorrectionFB; + reprojDepthFB = (SharedData::CameraData.x - SharedData::CameraData.w / newLinDepthFB) / SharedData::CameraData.z; + } + // Reproject to the other eye - Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepthFB, eyeIndex, FrameDim); if (!r.valid) { // Debug tint for failed reprojection if (DebugEdgeTint > 0) @@ -131,7 +180,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) return; - float4 otherColor = ColorTexture[r.otherPx]; + float4 otherColor = SampleReprojectedColor(r.otherPx, FrameDim); float otherDepth = DepthTexture[r.otherPx]; // Depth-weighted bilateral blend @@ -160,10 +209,31 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) // Eye 1: reproject all non-disoccluded, non-full-blend pixels (MAIN, EDGE) from Eye 0. // StencilCS already performed the authoritative disocclusion check with the correct // depth buffer state — no redundant depth agreement check here. - Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + float reprojDepth = centerDepth; + + // First-pass reprojection to find Eye 0 source pixel + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); if (!r.valid) return; + // Read POM offset from Eye 0 source's reflectance.w + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float pomOffset = ReflectanceTexture[r.otherPx].w; + if (pomOffset > 1e-2) { + // Re-reproject with POM-adjusted depth centered at geometry plane + float linearDepth = SharedData::GetScreenDepth(centerDepth); + float depthCorrection = (0.5 - pomOffset) * POMDepthScale; + float newLinearDepth = linearDepth + depthCorrection; + reprojDepth = (SharedData::CameraData.x - SharedData::CameraData.w / newLinearDepth) / SharedData::CameraData.z; + r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); + if (!r.valid) + return; + } + // Skip if the Eye 0 source pixel is sky/unrendered (depth at clear value). // At DeferredPasses time, sky hasn't rendered yet — source would have clear color. // Let the sky/water pass fill these pixels later instead. @@ -171,7 +241,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (sourceDepth >= 1.0 || sourceDepth < 1e-5) return; - OutputRW[dtid] = ColorTexture[r.otherPx]; + OutputRW[dtid] = SampleReprojectedColor(r.otherPx, FrameDim); MotionRW[dtid] = MotionRW[r.otherPx]; #else // Normal bilateral blend path diff --git a/src/Features/VR.h b/src/Features/VR.h index a9cab72282..07e3a1609e 100644 --- a/src/Features/VR.h +++ b/src/Features/VR.h @@ -361,6 +361,7 @@ struct VR : OverlayFeature winrt::com_ptr stereoBlendOverwriteCS; eastl::unique_ptr stereoBlendCopyTex; eastl::unique_ptr stereoBlendCB; + winrt::com_ptr stereoBlendLinearSampler; struct alignas(16) StereoBlendCB { @@ -372,7 +373,8 @@ struct VR : OverlayFeature float DebugEdgeTint; uint32_t DebugMode; float FullBlendDistance; - float _pad[2]; + float POMDepthScale; + float _pad; }; // Engine hook integration points diff --git a/src/Features/VR/StereoBlend.cpp b/src/Features/VR/StereoBlend.cpp index ff7eacf3e9..dbde04b645 100644 --- a/src/Features/VR/StereoBlend.cpp +++ b/src/Features/VR/StereoBlend.cpp @@ -5,6 +5,7 @@ #include "Features/ScreenSpaceShadows.h" #include "Features/VRStereoOptimizations.h" #include "State.h" +#include "Deferred.h" void VR::ClearShaderCache() { @@ -79,10 +80,13 @@ void VR::DrawStereoBlend() cbData.DebugMode = 1u; else if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugFullBlendDepth) cbData.DebugMode = 2u; + else if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugPOMDepth) + cbData.DebugMode = 3u; else cbData.DebugMode = 0u; cbData.FullBlendDistance = vrStereoOptActive ? globals::features::vrStereoOptimizations.settings.fullBlendDistance : 0.0f; + cbData.POMDepthScale = vrStereoOptActive ? globals::features::vrStereoOptimizations.settings.pomDepthScale : 1.0f; stereoBlendCB->Update(cbData); auto cbPtr = stereoBlendCB->CB(); @@ -125,6 +129,10 @@ void VR::DrawStereoBlend() if (modeSRV) context->CSSetShaderResources(2, 1, &modeSRV); + // Bind REFLECTANCE SRV for POM depth offset (stored in .w by Lighting pass) + auto& reflectanceRT = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; + context->CSSetShaderResources(3, 1, &reflectanceRT.SRV); + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, motionVectors.UAV }; context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); } else { @@ -132,16 +140,34 @@ void VR::DrawStereoBlend() context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); } + // Bind linear sampler for hardware bilinear color sampling in overwrite mode + if (isOverwriteMode) { + if (!stereoBlendLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, stereoBlendLinearSampler.put()); + } + ID3D11SamplerState* samplers[] = { stereoBlendLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + } + context->CSSetShader(activeCS, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); // Cleanup - ID3D11ShaderResourceView* nullSRVs[3] = {}; - context->CSSetShaderResources(0, isOverwriteMode ? 3 : 2, nullSRVs); + ID3D11ShaderResourceView* nullSRVs[4] = {}; + context->CSSetShaderResources(0, isOverwriteMode ? 4 : 2, nullSRVs); ID3D11UnorderedAccessView* nullUAVs[2] = {}; context->CSSetUnorderedAccessViews(0, isOverwriteMode ? 2 : 1, nullUAVs, nullptr); ID3D11Buffer* nullCB = nullptr; context->CSSetConstantBuffers(1, 1, &nullCB); + if (isOverwriteMode) { + ID3D11SamplerState* nullSampler[] = { nullptr }; + context->CSSetSamplers(0, 1, nullSampler); + } context->CSSetShader(nullptr, nullptr, 0); // Restore DSV after CS dispatch in overwrite mode From b130bf5b49646285c2ec6e1d37ad0843bd753072 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:30:32 -0600 Subject: [PATCH 04/16] feat(vr-stereo-opt): POM depth-aware reprojection via Reflectance.w MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store parallax occlusion mapping (POM) depth offset in Reflectance.w so the stereo reprojection shader can account for parallax displacement when reprojecting Eye 0 → Eye 1. Without this, POM surfaces show incorrect stereo depth. - Enable alpha writes on RT[5] (REFLECTANCE) in deferred blend states - Output pixelOffset to Reflectance.w in Lighting.hlsl (EMAT+PARALLAX) - Initialize pixelOffset=0 in ExtendedMaterials GetParallaxCoords - Add pomDepthScale setting and debugPOMDepth visualization - Disable CAS (force 0) pending proper integration; hide UI slider Co-Authored-By: Claude Opus 4.6 --- .../ExtendedMaterials/ExtendedMaterials.hlsli | 3 ++- package/Shaders/Lighting.hlsl | 7 ++++- src/Deferred.cpp | 4 +++ src/Features/VRStereoOptimizations.cpp | 27 ++++++++++++------- src/Features/VRStereoOptimizations.h | 4 ++- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli index 4152bdb2d5..051b3c387f 100644 --- a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli +++ b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli @@ -325,6 +325,7 @@ namespace ExtendedMaterials float2 GetParallaxCoords(float distance, float2 coords, float mipLevel, float3 viewDir, float3x3 tbn, float noise, Texture2D tex, SamplerState texSampler, uint channel, DisplacementParams params, out float pixelOffset) #endif { + pixelOffset = 0; float3 viewDirTS = normalize(mul(tbn, viewDir)); #if defined(LANDSCAPE) viewDirTS.xy /= viewDirTS.z * 0.7 + 0.3 + params[0].FlattenAmount; // Fix for objects at extreme viewing angles @@ -496,7 +497,7 @@ namespace ExtendedMaterials #endif nearBlendToFar *= nearBlendToFar; float offset = (1.0 - parallaxAmount) * -maxHeight + minHeight; - pixelOffset = lerp(parallaxAmount * scale, 0, nearBlendToFar); + pixelOffset = lerp(parallaxAmount, 0.5, nearBlendToFar); return lerp(viewDirTS.xy * offset + coords.xy, coords, nearBlendToFar); } diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index 9d4c21120e..d0cbf3365f 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -3198,7 +3198,12 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } # endif - psout.Reflectance = float4(indirectLobeWeights.specular, psout.Diffuse.w); +# if defined(EMAT) && (defined(PARALLAX) || defined(LANDSCAPE)) + psout.Reflectance = float4(indirectLobeWeights.specular, + (pixelOffset > 0.0) ? saturate(pixelOffset) : 0.0); +# else + psout.Reflectance = float4(indirectLobeWeights.specular, 0.0); +# endif psout.NormalGlossiness = float4(GBuffer::EncodeNormal(screenSpaceNormal), saturate(1.0 - material.Roughness), psout.Diffuse.w); # if defined(SNOW) diff --git a/src/Deferred.cpp b/src/Deferred.cpp index a0a5063e5e..ba8c1f3830 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -504,6 +504,10 @@ void Deferred::OverrideBlendStates() blendDesc.RenderTarget[i].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; } + // RT[5] = REFLECTANCE: enable alpha writes for POM depth data + // stored in Reflectance.w, used by StereoBlendCS for depth-aware reprojection + blendDesc.RenderTarget[5].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[a][b][c][d])); } else { deferredBlendStates[a][b][c][d] = nullptr; diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp index a242ccbc3b..0dcbdf3d42 100644 --- a/src/Features/VRStereoOptimizations.cpp +++ b/src/Features/VRStereoOptimizations.cpp @@ -1,5 +1,6 @@ #include "VRStereoOptimizations.h" +#include "ExtendedMaterials.h" #include "Globals.h" #include "State.h" #include "Utils/D3D.h" @@ -79,8 +80,10 @@ void VRStereoOptimizations::LoadSettings(json& o_json) settings.mipBiasNearDist = o_json["MipBiasNearDist"].get(); if (o_json.contains("MipBiasFarDist")) settings.mipBiasFarDist = o_json["MipBiasFarDist"].get(); - if (o_json.contains("CASStrength")) - settings.casStrength = o_json["CASStrength"].get(); + // CAS disabled for now — ignore saved value + // if (o_json.contains("CASStrength")) + // settings.casStrength = o_json["CASStrength"].get(); + settings.casStrength = 0.0f; if (o_json.contains("AlphaTestThreshold")) settings.alphaTestThreshold = o_json["AlphaTestThreshold"].get(); } @@ -332,23 +335,29 @@ void VRStereoOptimizations::DrawSettings() ImGui::Separator(); - ImGui::SliderFloat("CAS Sharpening", &settings.casStrength, 0.0f, 1.0f, "%.2f"); - if (ImGui::IsItemHovered()) - ImGui::SetTooltip("Contrast Adaptive Sharpening (intended for use with TAA).\n0 = disabled, higher = sharper."); - ImGui::Separator(); + // CAS slider hidden for now — forced to 0 + // ImGui::SliderFloat("CAS Sharpening", &settings.casStrength, 0.0f, 1.0f, "%.2f"); + // if (ImGui::IsItemHovered()) + // ImGui::SetTooltip("Contrast Adaptive Sharpening (intended for use with TAA).\n0 = disabled, higher = sharper."); + // ImGui::Separator(); if (settings.stereoMode == StereoMode::Off) return; ImGui::SliderFloat("Disocclusion Depth Threshold", &settings.disocclusionDepthThreshold, 0.001f, 0.1f, "%.4f"); - ImGui::SliderFloat("Full Blend Distance", &settings.fullBlendDistance, 0.0f, 10000.0f, "%.0f"); - if (ImGui::IsItemHovered()) - ImGui::SetTooltip("Geometry closer than this distance (game units) is fully shaded in both eyes and bilaterally blended for 2x supersampling. 0 = disabled."); if (globals::state->IsDeveloperMode()) { if (ImGui::TreeNode("Debug")) { + ImGui::SliderFloat("Full Blend Distance", &settings.fullBlendDistance, 0.0f, 10000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Geometry closer than this distance (game units) is fully shaded in both eyes and bilaterally blended for 2x supersampling. 0 = disabled."); + + ImGui::SliderFloat("POM Depth Scale", &settings.pomDepthScale, 0.0f, 500.0f, "%.1f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Scale factor for POM depth correction in stereo reprojection.\n1.0 = physical scale. Increase for more visible POM stereo depth."); ImGui::Checkbox("Skip Pixel Reprojection", &settings.debugSkipMerge); ImGui::Checkbox("Full Blend Depth View", &settings.debugFullBlendDepth); + ImGui::Checkbox("Debug POM Depth", &settings.debugPOMDepth); if (settings.debugFullBlendDepth) ImGui::TextColored(ImVec4(0, 1, 1, 1), " Cyan = full blend zone (closer = stronger tint)"); ImGui::Text("Stencil swaps this frame: %u", stencilSwapCount); diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h index 7aed5ff837..d4e92f8dca 100644 --- a/src/Features/VRStereoOptimizations.h +++ b/src/Features/VRStereoOptimizations.h @@ -83,6 +83,7 @@ struct VRStereoOptimizations : public Feature int edgeWidth = 3; ///< Half-width of edge band in pixels (total band = 2 * edgeWidth) float minEdgeDistance = 5000.0f; ///< Minimum linearized depth for edge AA (game units) float fullBlendDistance = 0.0f; ///< Linearized depth below which both eyes are fully shaded + blended (game units) + float pomDepthScale = 22.5f; ///< Scale factor for POM depth correction in stereo reprojection bool debugFullBlendDepth = false; ///< Show full blend depth zone as cyan overlay float qualityJitterOffset = 0.125f; float foveatedRegionRadius = 0.3f; @@ -98,6 +99,7 @@ struct VRStereoOptimizations : public Feature bool debugForceAllStencil = false; bool debugForceAllReprojectCS = false; bool debugDepthMap = false; + bool debugPOMDepth = false; ///< Show POM depth data (Reflectance.w) as heatmap overlay // MIP LOD Bias (negative = sharper textures) // 0 = Off, 1 = All textures (global), 2 = Distant trees only (depth-gated TREE_ANIM) @@ -107,7 +109,7 @@ struct VRStereoOptimizations : public Feature float mipBiasFarDist = 6000.0f; ///< Game units: full bias beyond this // CAS (Contrast Adaptive Sharpening) - post-TAA - float casStrength = 0.7f; ///< 0.0 = disabled, 0.0-1.0 = subtle to strong + float casStrength = 0.0f; ///< 0.0 = disabled, 0.0-1.0 = subtle to strong (hidden for now) float alphaTestThreshold = 0.001f; ///< Alpha floor for TREE_ANIM zombie texel removal } settings; From 19edb700b8c7d9ad7aa66e2fce3f670a6ed7b86e Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:30:56 -0600 Subject: [PATCH 05/16] fix: SharedData CPMSettings struct alignment after SH field merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix HLSL/C++ struct mismatch in CPMSettings.pad0: change float1 → bool in HLSL and float → uint in C++ to match the bool fields preceding it. Required after merging VR MIP bias fields with upstream SH ambient fields in SharedDataCB. Co-Authored-By: Claude Opus 4.6 --- package/Shaders/Common/SharedData.hlsli | 2 +- src/Features/ExtendedMaterials.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 3ddf2f9ec7..8729395239 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -57,7 +57,7 @@ namespace SharedData bool EnableShadows; bool ExtendShadows; bool EnableParallaxWarpingFix; - float1 pad0; + bool pad0; }; struct CubemapCreatorSettings diff --git a/src/Features/ExtendedMaterials.h b/src/Features/ExtendedMaterials.h index 2a05a940aa..83da3abed3 100644 --- a/src/Features/ExtendedMaterials.h +++ b/src/Features/ExtendedMaterials.h @@ -36,7 +36,7 @@ struct ExtendedMaterials : Feature uint ExtendShadows = 1; uint EnableParallaxWarpingFix = 1; - float pad[1]; + uint pad0 = 0; }; STATIC_ASSERT_ALIGNAS_16(Settings); From 39d26c6768cbb2fcad4c3b441ea270e8377dda24 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:42:53 -0600 Subject: [PATCH 06/16] fix: correct CommonLib submodule and SharedDataCB alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reset CommonLibSSE-NG submodule to upstream 4.7.1 (23bf5512). Fix SharedDataCB struct padding: float pad0 → float4 pad0 to match HLSL cbuffer layout (float2 + implicit 8-byte gap before float4 AmbientSHR) and ensure sizeof is a multiple of 16. Co-Authored-By: Claude Opus 4.6 --- src/State.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/State.h b/src/State.h index e0e01918d8..f54e180a75 100644 --- a/src/State.h +++ b/src/State.h @@ -215,7 +215,7 @@ class State float VRMipBiasFarDist; uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees only float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = use vanilla) - float pad0; + float4 pad0; // HLSL: float2 + implicit 8-byte gap before float4 AmbientSHR float4 AmbientSHR; float4 AmbientSHG; float4 AmbientSHB; From f58e0c6b487ef376c4534ad2214a67f31c578aa0 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:46:43 -0600 Subject: [PATCH 07/16] fix: missing semicolon in State.cpp after merge conflict resolution Co-Authored-By: Claude Opus 4.6 --- src/State.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/State.cpp b/src/State.cpp index 5b9524f354..ae66ae1e43 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -881,7 +881,7 @@ void State::UpdateSharedData([[maybe_unused]] bool a_inWorld, [[maybe_unused]] b SphericalHarmonics::SH2Color dalcSH = SphericalHarmonics::DALCToSH(dalcColors); data.AmbientSHR = { dalcSH.r.c0, dalcSH.r.c1[0], dalcSH.r.c1[1], dalcSH.r.c1[2] }; data.AmbientSHG = { dalcSH.g.c0, dalcSH.g.c1[0], dalcSH.g.c1[1], dalcSH.g.c1[2] }; - data.AmbientSHB = { dalcSH.b.c0, dalcSH.b.c1[0], dalcSH.b.c1[1], dalcSH.b.c1[2] } + data.AmbientSHB = { dalcSH.b.c0, dalcSH.b.c1[0], dalcSH.b.c1[1], dalcSH.b.c1[2] }; sharedDataCB->Update(data); } From e69a386832f95d7c86eb564c4b00a6fc6837da63 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:06:13 +0000 Subject: [PATCH 08/16] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commi?= =?UTF-8?q?t.ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- package/Shaders/Common/SharedData.hlsli | 18 ++++---- package/Shaders/DistantTree.hlsl | 8 ++-- package/Shaders/Lighting.hlsl | 7 ++- package/Shaders/RunGrass.hlsl | 8 ++-- package/Shaders/VR/StereoBlendCS.hlsl | 28 ++++++------ package/Shaders/VR/VRPostProcessCS.hlsl | 17 ++++--- .../VRStereoOptimizations/StencilCS.hlsl | 36 +++++++-------- .../VRStereoOptimizations/StencilWritePS.hlsl | 4 +- .../VRStereoOptimizations/StencilWriteVS.hlsl | 4 +- .../VRStereoOptimizations/cbuffers.hlsli | 30 ++++++------- src/Features/Upscaling.cpp | 37 +++++++--------- src/Features/Upscaling.h | 16 +++---- src/Features/VR/StereoBlend.cpp | 3 +- src/Features/VRStereoOptimizations.cpp | 7 ++- src/Features/VRStereoOptimizations.h | 44 +++++++++---------- src/Globals.cpp | 3 +- src/State.h | 4 +- 17 files changed, 133 insertions(+), 141 deletions(-) diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 8729395239..959678edff 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -20,15 +20,15 @@ namespace SharedData float Timer; uint FrameCount; uint FrameCountAlwaysActive; - bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon - bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) - bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach - float MipBias; // Offset to mip level for TAA sharpness - float VRMipBias; // Additional negative MIP bias for VR foliage sharpening (depth-scaled) - float VRMipBiasNearDist; // Game units: no VR MIP bias closer than this - float VRMipBiasFarDist; // Game units: full VR MIP bias beyond this - uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees (TREE_ANIM) only - float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = disabled) + bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon + bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) + bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach + float MipBias; // Offset to mip level for TAA sharpness + float VRMipBias; // Additional negative MIP bias for VR foliage sharpening (depth-scaled) + float VRMipBiasNearDist; // Game units: no VR MIP bias closer than this + float VRMipBiasFarDist; // Game units: full VR MIP bias beyond this + uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees (TREE_ANIM) only + float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = disabled) float2 pad0; float4 AmbientSHR; float4 AmbientSHG; diff --git a/package/Shaders/DistantTree.hlsl b/package/Shaders/DistantTree.hlsl index 8799feb633..0fa3314f0f 100644 --- a/package/Shaders/DistantTree.hlsl +++ b/package/Shaders/DistantTree.hlsl @@ -205,9 +205,9 @@ PS_OUTPUT main(PS_INPUT input) { float alphaRef = AlphaTestRefRS; -#if defined(VR) +# if defined(VR) alphaRef -= eyeIndex * 0.1; -#endif +# endif if ((alpha - alphaRef) < 0) { discard; } @@ -221,9 +221,9 @@ PS_OUTPUT main(PS_INPUT input) { float alphaRef = AlphaTestRefRS; -#if defined(VR) +# if defined(VR) alphaRef -= eyeIndex * 0.1; -#endif +# endif if ((baseColor.w - alphaRef) < 0) { discard; } diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index d0cbf3365f..3ffc7c311b 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -1783,11 +1783,11 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) // VR MIP bias: depth-gated sharpening for distant textures // Mode 1 = All Textures, Mode 2 = Distant Trees (TREE_ANIM) only float vrFoliageBias = 0; -# if defined(TREE_ANIM) +# if defined(TREE_ANIM) if (SharedData::VRMipBias < 0) { -# else +# else if (SharedData::VRMipBias < 0 && SharedData::VRMipBiasMode == 1) { -# endif +# endif float linDepth = SharedData::GetScreenDepth(input.Position.z); float t = saturate((linDepth - SharedData::VRMipBiasNearDist) / max(SharedData::VRMipBiasFarDist - SharedData::VRMipBiasNearDist, 1.0)); vrFoliageBias = SharedData::VRMipBias * t; @@ -3110,7 +3110,6 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # endif // TREE_ANIM # endif // DO_ALPHA_TEST - # if defined(ANISOTROPIC_ALPHA) // Uniform alpha material settings uint AlphaMaterialModel = ExtendedTranslucency::GetMaterialModelFromDescriptor(Permutation::ExtraFeatureDescriptor); diff --git a/package/Shaders/RunGrass.hlsl b/package/Shaders/RunGrass.hlsl index 805779acce..664fe97a6a 100644 --- a/package/Shaders/RunGrass.hlsl +++ b/package/Shaders/RunGrass.hlsl @@ -504,10 +504,10 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) float diffuseAlpha = input.VertexColor.w * baseColor.w; { float alphaRef = AlphaTestRefRS; -#if defined(VR) +# if defined(VR) uint convergenceEyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); alphaRef -= convergenceEyeIndex * 0.1; -#endif +# endif if ((diffuseAlpha - alphaRef) < 0) { discard; } @@ -875,10 +875,10 @@ PS_OUTPUT main(PS_INPUT input) float diffuseAlpha = input.VertexColor.w * baseColor.w; { float alphaRef = AlphaTestRefRS; -#if defined(VR) +# if defined(VR) uint convergenceEyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); alphaRef -= convergenceEyeIndex * 0.1; -#endif +# endif if ((diffuseAlpha - alphaRef) < 0) { discard; } diff --git a/package/Shaders/VR/StereoBlendCS.hlsl b/package/Shaders/VR/StereoBlendCS.hlsl index 0652844b29..961dd01ad5 100644 --- a/package/Shaders/VR/StereoBlendCS.hlsl +++ b/package/Shaders/VR/StereoBlendCS.hlsl @@ -27,11 +27,11 @@ SamplerState LinearSampler : register(s0); // Mode constants matching VRStereoOptimizations/cbuffers.hlsli // (can't include directly — its cbuffer on b1 conflicts with StereoBlendCB) -#define MODE_DISOCCLUDED 0 -#define MODE_EDGE 1 -#define MODE_MAIN 2 -#define MODE_EDGE_NEIGHBOUR 3 -#define MODE_FULL_BLEND 4 +# define MODE_DISOCCLUDED 0 +# define MODE_EDGE 1 +# define MODE_MAIN 2 +# define MODE_EDGE_NEIGHBOUR 3 +# define MODE_FULL_BLEND 4 // Hardware bilinear color sample from reprojected pixel coordinates. // Converts integer pixel coords to proper full-texture UV for SampleLevel, @@ -59,13 +59,13 @@ cbuffer StereoBlendCB : register(b1) float MaxBlendFactor; float ColorDiffThreshold; float DebugEdgeTint; - uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer, 3 = POM depth heatmap + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer, 3 = POM depth heatmap float FullBlendDistance; float POMDepthScale; float _pad; }; -static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend +static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check static const float kDepthAgreementThreshold = 0.015; // Relative depth difference threshold for overwrite mode disocclusion rejection @@ -246,12 +246,12 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) #else // Normal bilateral blend path -#ifdef EYE0_ONLY +# ifdef EYE0_ONLY // Only process Eye 0 (left half) - Eye 1 left untouched float2 uvCheck = (dtid + 0.5) * RcpFrameDim; if (Stereo::GetEyeIndexFromTexCoord(uvCheck) == 1) return; -#endif +# endif float2 uv = (dtid + 0.5) * RcpFrameDim; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); @@ -305,7 +305,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } } -#ifdef DEBUG_BACKCHECK +# ifdef DEBUG_BACKCHECK // Debug visualization (6 states): // Blue = mask/sky: skipped // Yellow = source edge: depth discontinuity at this pixel @@ -322,7 +322,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float3(0.5, 0.0, 0.0) // 5: back-check failed - red }; OutputRW[dtid] = float4(lerp(centerColor.rgb, debugColors[debugState], 0.7), centerColor.a); -#elif defined(DEBUG_BLEND_WEIGHT) +# elif defined(DEBUG_BLEND_WEIGHT) // Blend weight heatmap: only pixels with actual blend activity are colorized. // Untouched pixels pass through unmodified. float w = saturate(r.blendWeight / max(MaxBlendFactor, 1e-5)); @@ -332,7 +332,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = centerColor; } -#elif defined(DEBUG_EDGE_DETECTION) +# elif defined(DEBUG_EDGE_DETECTION) // Edge detection visualizer: highlights pixels excluded by depth discontinuity checks. // Non-edge pixels show the normal blended output for scene context. // Bright yellow = source edge: discontinuity at this pixel @@ -344,9 +344,9 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = blendedColor; } -#else +# else OutputRW[dtid] = blendedColor; -#endif +# endif #endif // STEREO_OVERWRITE } diff --git a/package/Shaders/VR/VRPostProcessCS.hlsl b/package/Shaders/VR/VRPostProcessCS.hlsl index a51e19e8db..29df310420 100644 --- a/package/Shaders/VR/VRPostProcessCS.hlsl +++ b/package/Shaders/VR/VRPostProcessCS.hlsl @@ -20,20 +20,19 @@ cbuffer VRPostProcessCB : register(b1) { float2 FrameDim; float2 RcpFrameDim; - float DebugEdgeTint; // 0 = off, >0 = debug visualization strength - uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer + float DebugEdgeTint; // 0 = off, >0 = debug visualization strength + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer float FullBlendDistance; // Linearized depth threshold for full blend zone visualization - float _pad; // Pad to 16-byte alignment + float _pad; // Pad to 16-byte alignment }; -#define MODE_DISOCCLUDED 0 -#define MODE_EDGE 1 -#define MODE_MAIN 2 +#define MODE_DISOCCLUDED 0 +#define MODE_EDGE 1 +#define MODE_MAIN 2 #define MODE_EDGE_NEIGHBOUR 3 -#define MODE_FULL_BLEND 4 +#define MODE_FULL_BLEND 4 -[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) -{ +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { if (any(dtid >= uint2(FrameDim))) return; diff --git a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl index 647edacef4..7510c7d75c 100644 --- a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl +++ b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl @@ -8,8 +8,8 @@ // // Dispatched over full SBS resolution (FrameDim.x x FrameDim.y). -#include "Common/VR.hlsli" #include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" #include "VRStereoOptimizations/cbuffers.hlsli" Texture2D DepthTexture : register(t0); @@ -105,25 +105,25 @@ static const float kDisocclusionThreshold = 0.015; uint maxWidth = kInnerWidth; if (!skipEdgeDetection) { - [loop] - for (uint d = 1; d <= maxWidth; d++) { - [unroll] - for (int i = 0; i < 4; i++) { - int2 rawNeighbor = int2(dtid) + offsets[i] * (int)d; - uint2 neighborCoord = Stereo::ClampToEyeBounds(rawNeighbor, eyeIndex, FrameDim); - - float neighborDepth = DepthTexture[neighborCoord]; - bool neighborIsSky = (neighborDepth < 1e-5) || (neighborDepth >= 1.0); - float linNeighbor = neighborIsSky ? 999999.0 : SharedData::GetScreenDepth(neighborDepth); - float maxLin = max(max(linCenter, linNeighbor), 1e-5); - float relDepthDiff = abs(linCenter - linNeighbor) / maxLin; - - if (relDepthDiff > EdgeDepthThreshold && d < nearestEdgeDist) { - nearestEdgeDist = d; - nearestWeAreOuter = (linNeighbor < linCenter); // neighbor closer to camera = we are background + [loop] for (uint d = 1; d <= maxWidth; d++) + { + [unroll] for (int i = 0; i < 4; i++) + { + int2 rawNeighbor = int2(dtid) + offsets[i] * (int)d; + uint2 neighborCoord = Stereo::ClampToEyeBounds(rawNeighbor, eyeIndex, FrameDim); + + float neighborDepth = DepthTexture[neighborCoord]; + bool neighborIsSky = (neighborDepth < 1e-5) || (neighborDepth >= 1.0); + float linNeighbor = neighborIsSky ? 999999.0 : SharedData::GetScreenDepth(neighborDepth); + float maxLin = max(max(linCenter, linNeighbor), 1e-5); + float relDepthDiff = abs(linCenter - linNeighbor) / maxLin; + + if (relDepthDiff > EdgeDepthThreshold && d < nearestEdgeDist) { + nearestEdgeDist = d; + nearestWeAreOuter = (linNeighbor < linCenter); // neighbor closer to camera = we are background + } } } - } } // !skipEdgeDetection diff --git a/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl index a7316065b9..c45c2a2409 100644 --- a/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl +++ b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl @@ -21,8 +21,8 @@ Texture2D DepthTexture : register(t1); struct PS_INPUT { - float4 Position : SV_Position; - float2 TexCoord : TEXCOORD0; + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; }; void main(PS_INPUT input) diff --git a/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl index 7a45fa60c6..353aa53379 100644 --- a/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl +++ b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl @@ -7,8 +7,8 @@ struct VS_OUTPUT { - float4 Position : SV_Position; - float2 TexCoord : TEXCOORD0; + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; }; VS_OUTPUT main(uint vertexID : SV_VertexID) diff --git a/package/Shaders/VRStereoOptimizations/cbuffers.hlsli b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli index 76f31b6883..5f8c79caf7 100644 --- a/package/Shaders/VRStereoOptimizations/cbuffers.hlsli +++ b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli @@ -6,30 +6,30 @@ cbuffer VRStereoOptParams : register(b1) { - float2 FrameDim; // Full stereo buffer dimensions (both eyes) - float2 RcpFrameDim; // 1.0 / FrameDim + float2 FrameDim; // Full stereo buffer dimensions (both eyes) + float2 RcpFrameDim; // 1.0 / FrameDim - uint StereoModeValue; // 0=Off, 1=Enable - float DisocclusionThreshold; // Depth difference threshold for disocclusion detection - float EdgeDepthThreshold; // Relative depth difference threshold for edge detection - uint EdgeWidth; // Half-width of edge detection band in pixels + uint StereoModeValue; // 0=Off, 1=Enable + float DisocclusionThreshold; // Depth difference threshold for disocclusion detection + float EdgeDepthThreshold; // Relative depth difference threshold for edge detection + uint EdgeWidth; // Half-width of edge detection band in pixels - float2 QualityJitter; // Sub-pixel jitter offset (Quality mode) - float FoveatedRadius; // Radius of foveal region in UV space + float2 QualityJitter; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; // Radius of foveal region in UV space float pad2; - float2 FoveatedCenter; // Center of foveal region in UV space + float2 FoveatedCenter; // Center of foveal region in UV space float MinEdgeDistance; - float FullBlendDistance; // Linearized depth below which pixels get MODE_FULL_BLEND (game units) + float FullBlendDistance; // Linearized depth below which pixels get MODE_FULL_BLEND (game units) }; -#define STEREO_MODE_OFF 0 +#define STEREO_MODE_OFF 0 #define STEREO_MODE_ENABLE 1 -#define MODE_DISOCCLUDED 0 // Fully shaded, no reprojection, no blend (sky, HMD mask, parallax-occluded) -#define MODE_EDGE 1 // Depth edge boundary (distance 1) or inner/foreground band; fully shaded + bilateral blend -#define MODE_MAIN 2 // Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite reproject (Perf) / bilateral (Quality) +#define MODE_DISOCCLUDED 0 // Fully shaded, no reprojection, no blend (sky, HMD mask, parallax-occluded) +#define MODE_EDGE 1 // Depth edge boundary (distance 1) or inner/foreground band; fully shaded + bilateral blend +#define MODE_MAIN 2 // Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite reproject (Perf) / bilateral (Quality) #define MODE_EDGE_NEIGHBOUR 3 // (Legacy, unused) Outer/background band — now classified as MODE_MAIN -#define MODE_FULL_BLEND 4 // Near-camera geometry: both eyes fully shaded + bilateral blend for 2x supersampling +#define MODE_FULL_BLEND 4 // Near-camera geometry: both eyes fully shaded + bilateral blend for 2x supersampling #endif diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 117670e19a..e2215848f8 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -3,8 +3,8 @@ #include "Deferred.h" #include "Features/VRStereoOptimizations.h" #include "Hooks.h" -#include "TAAReorder.h" #include "State.h" +#include "TAAReorder.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" #include "Upscaling/Streamline.h" @@ -984,16 +984,16 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de D3D11_TEXTURE2D_DESC srcDesc; ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || - vrCropColorIn[0]->desc.Width != cropWidthIn || - vrCropColorIn[0]->desc.Height != cropHeightIn || - vrIntermediateDepth[0]->desc.Width != cropWidthIn || - vrIntermediateDepth[0]->desc.Height != cropHeightIn || - vrIntermediateColorOut[0]->desc.Width != cropWidthOut || - vrIntermediateColorOut[0]->desc.Height != cropHeightOut || - vrFinalOutput[0]->desc.Width != eyeWidthOut || - vrFinalOutput[0]->desc.Height != eyeHeightOut); + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrCropColorIn[0]->desc.Width != cropWidthIn || + vrCropColorIn[0]->desc.Height != cropHeightIn || + vrIntermediateDepth[0]->desc.Width != cropWidthIn || + vrIntermediateDepth[0]->desc.Height != cropHeightIn || + vrIntermediateColorOut[0]->desc.Width != cropWidthOut || + vrIntermediateColorOut[0]->desc.Height != cropHeightOut || + vrFinalOutput[0]->desc.Width != eyeWidthOut || + vrFinalOutput[0]->desc.Height != eyeHeightOut); } if (needsRecreate) { @@ -1076,7 +1076,7 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de // Crop depth/mvec/reactive/transparency directly from stereo buffers D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, - offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &stereoCropBox); context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, @@ -1100,10 +1100,10 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de D3D11_TEXTURE2D_DESC srcDesc; ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); } if (needsRecreate) { logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", @@ -1758,7 +1758,6 @@ std::vector Upscaling::GetActiveConstraints() co return constraints; } - /** * @brief Retrieves the current frame time for frame generation. * @@ -1932,9 +1931,7 @@ void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) state->BeginPerfEvent("Upscaling"); // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) - ID3D11Resource* colorSrc = colorSourceOverride - ? static_cast(colorSourceOverride) - : static_cast(main.texture); + ID3D11Resource* colorSrc = colorSourceOverride ? static_cast(colorSourceOverride) : static_cast(main.texture); if (upscaleMethod == UpscaleMethod::kDLSS) { streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index 7eb13124e9..eace6aa3c3 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -57,10 +57,10 @@ struct Upscaling : Feature uint streamlineLogLevel = 0; // 0=Off, 1=Default, 2=Verbose float sharpnessFSR = 0.0f; float sharpnessDLSS = 0.0f; - uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M - uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M + uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) - uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) + uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) }; Settings settings; @@ -159,8 +159,8 @@ struct Upscaling : Feature eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) // Periphery TAA (conductor approach) — used by two-call func() flow - winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) - eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) + winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) + eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) winrt::com_ptr vrPeripheryFillCS; @@ -168,9 +168,9 @@ struct Upscaling : Feature winrt::com_ptr vrLinearSampler; // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) - winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) - winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) - winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params + winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) + winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) + winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params ID3D11PixelShader* GetDlssCompositePS(); ID3D11PixelShader* GetDlssUpscalePS(); diff --git a/src/Features/VR/StereoBlend.cpp b/src/Features/VR/StereoBlend.cpp index dbde04b645..cdf945b044 100644 --- a/src/Features/VR/StereoBlend.cpp +++ b/src/Features/VR/StereoBlend.cpp @@ -1,11 +1,11 @@ #include "Features/VR.h" +#include "Deferred.h" #include "Features/DynamicCubemaps.h" #include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" #include "Features/VRStereoOptimizations.h" #include "State.h" -#include "Deferred.h" void VR::ClearShaderCache() { @@ -183,5 +183,4 @@ void VR::DrawStereoBlend() if (globals::state->frameAnnotations) globals::state->EndPerfEvent(); - } diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp index 0dcbdf3d42..5a1fe516ef 100644 --- a/src/Features/VRStereoOptimizations.cpp +++ b/src/Features/VRStereoOptimizations.cpp @@ -10,9 +10,9 @@ // JSON enum serialization for StereoMode NLOHMANN_JSON_SERIALIZE_ENUM(VRStereoOptimizations::StereoMode, { - { VRStereoOptimizations::StereoMode::Off, "Off" }, - { VRStereoOptimizations::StereoMode::Enable, "Enable" }, -}) + { VRStereoOptimizations::StereoMode::Off, "Off" }, + { VRStereoOptimizations::StereoMode::Enable, "Enable" }, + }) //============================================================================= // SETTINGS MANAGEMENT @@ -334,7 +334,6 @@ void VRStereoOptimizations::DrawSettings() } ImGui::Separator(); - // CAS slider hidden for now — forced to 0 // ImGui::SliderFloat("CAS Sharpening", &settings.casStrength, 0.0f, 1.0f, "%.2f"); // if (ImGui::IsItemHovered()) diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h index d4e92f8dca..5f14963854 100644 --- a/src/Features/VRStereoOptimizations.h +++ b/src/Features/VRStereoOptimizations.h @@ -28,16 +28,16 @@ struct VRStereoOptimizations : public Feature /// Operating mode for stereo reprojection enum class StereoMode : uint32_t { - Off = 0, ///< Feature disabled - Enable = 1 ///< Stereo reprojection enabled + Off = 0, ///< Feature disabled + Enable = 1 ///< Stereo reprojection enabled }; /// Per-pixel classification written by StencilCS enum PixelMode : uint8_t { - MODE_DISOCCLUDED = 0, ///< Fully shaded, no reprojection, no blend - MODE_EDGE = 1, ///< Fully shaded + bilateral blend with other eye - MODE_MAIN = 2, ///< Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite (Perf) / bilateral (Quality) + MODE_DISOCCLUDED = 0, ///< Fully shaded, no reprojection, no blend + MODE_EDGE = 1, ///< Fully shaded + bilateral blend with other eye + MODE_MAIN = 2, ///< Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite (Perf) / bilateral (Quality) MODE_EDGE_NEIGHBOUR = 3, ///< Outer band: background pixels near edge, blended in post-process }; @@ -80,10 +80,10 @@ struct VRStereoOptimizations : public Feature StereoMode stereoMode = StereoMode::Enable; float disocclusionDepthThreshold = 0.01f; float edgeDepthThreshold = 0.05f; - int edgeWidth = 3; ///< Half-width of edge band in pixels (total band = 2 * edgeWidth) - float minEdgeDistance = 5000.0f; ///< Minimum linearized depth for edge AA (game units) - float fullBlendDistance = 0.0f; ///< Linearized depth below which both eyes are fully shaded + blended (game units) - float pomDepthScale = 22.5f; ///< Scale factor for POM depth correction in stereo reprojection + int edgeWidth = 3; ///< Half-width of edge band in pixels (total band = 2 * edgeWidth) + float minEdgeDistance = 5000.0f; ///< Minimum linearized depth for edge AA (game units) + float fullBlendDistance = 0.0f; ///< Linearized depth below which both eyes are fully shaded + blended (game units) + float pomDepthScale = 22.5f; ///< Scale factor for POM depth correction in stereo reprojection bool debugFullBlendDepth = false; ///< Show full blend depth zone as cyan overlay float qualityJitterOffset = 0.125f; float foveatedRegionRadius = 0.3f; @@ -105,11 +105,11 @@ struct VRStereoOptimizations : public Feature // 0 = Off, 1 = All textures (global), 2 = Distant trees only (depth-gated TREE_ANIM) int mipBiasMode = 0; float mipLodBias = -2.0f; - float mipBiasNearDist = 2000.0f; ///< Game units: no bias closer than this - float mipBiasFarDist = 6000.0f; ///< Game units: full bias beyond this + float mipBiasNearDist = 2000.0f; ///< Game units: no bias closer than this + float mipBiasFarDist = 6000.0f; ///< Game units: full bias beyond this // CAS (Contrast Adaptive Sharpening) - post-TAA - float casStrength = 0.0f; ///< 0.0 = disabled, 0.0-1.0 = subtle to strong (hidden for now) + float casStrength = 0.0f; ///< 0.0 = disabled, 0.0-1.0 = subtle to strong (hidden for now) float alphaTestThreshold = 0.001f; ///< Alpha floor for TREE_ANIM zombie texel removal } settings; @@ -119,21 +119,21 @@ struct VRStereoOptimizations : public Feature struct alignas(16) VRStereoOptParams { - float FrameDim[2]; // Full stereo buffer dimensions - float RcpFrameDim[2]; // 1.0 / FrameDim + float FrameDim[2]; // Full stereo buffer dimensions + float RcpFrameDim[2]; // 1.0 / FrameDim - uint32_t StereoModeValue; // Cast of StereoMode enum (0-3) + uint32_t StereoModeValue; // Cast of StereoMode enum (0-3) float DisocclusionThreshold; float EdgeDepthThreshold; uint32_t EdgeWidth; - float QualityJitter[2]; // Sub-pixel jitter offset (Quality mode) + float QualityJitter[2]; // Sub-pixel jitter offset (Quality mode) float FoveatedRadius; float pad2; - float FoveatedCenter[2]; // Foveal region center UV + float FoveatedCenter[2]; // Foveal region center UV float MinEdgeDistance; - float FullBlendDistance; // Linearized depth for full blend zone + float FullBlendDistance; // Linearized depth for full blend zone }; static_assert(sizeof(VRStereoOptParams) % 16 == 0, "VRStereoOptParams must be 16-byte aligned for HLSL cbuffer."); @@ -205,8 +205,8 @@ struct VRStereoOptimizations : public Feature //============================================================================= eastl::unique_ptr paramsCB; - eastl::unique_ptr texPerPixelMode; ///< R8_UINT classification texture (full SBS resolution) - eastl::unique_ptr reprojectionCopyTex; ///< Copy of main RT for reprojection read + eastl::unique_ptr texPerPixelMode; ///< R8_UINT classification texture (full SBS resolution) + eastl::unique_ptr reprojectionCopyTex; ///< Copy of main RT for reprojection read winrt::com_ptr stencilWriteDSS; winrt::com_ptr stencilWriteRS; @@ -220,8 +220,8 @@ struct VRStereoOptimizations : public Feature // CAS sharpening resources winrt::com_ptr casCS; - eastl::unique_ptr casTex; ///< UAV-capable texture for CAS output - winrt::com_ptr casParamsBuf; ///< Structured buffer for CAS sharpness param + eastl::unique_ptr casTex; ///< UAV-capable texture for CAS output + winrt::com_ptr casParamsBuf; ///< Structured buffer for CAS sharpness param winrt::com_ptr casParamsSRV; ///< SRV for CAS sharpness param /// Cache of original DSS -> modified DSS with stencil NOT_EQUAL enforcement diff --git a/src/Globals.cpp b/src/Globals.cpp index 8ece4c4b4b..483fe10087 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -29,11 +29,11 @@ #include "Features/UnifiedWater.h" #include "Features/Upscaling.h" #include "Features/VR.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricLighting.h" #include "Features/VolumetricShadows.h" #include "Features/WaterEffects.h" #include "Features/WeatherEditor.h" -#include "Features/VRStereoOptimizations.h" #include "Features/WetnessEffects.h" #include "Menu.h" #include "ShaderCache.h" @@ -293,7 +293,6 @@ namespace globals if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { pDepthStencilState = stereoOpt.GetOrCreateModifiedDSS(pDepthStencilState); StencilRef = 1; // Must match the ref written by our stencil pass - } } func(This, pDepthStencilState, StencilRef); diff --git a/src/State.h b/src/State.h index f54e180a75..0542a7e1a6 100644 --- a/src/State.h +++ b/src/State.h @@ -213,9 +213,9 @@ class State float VRMipBias; float VRMipBiasNearDist; float VRMipBiasFarDist; - uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees only + uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees only float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = use vanilla) - float4 pad0; // HLSL: float2 + implicit 8-byte gap before float4 AmbientSHR + float4 pad0; // HLSL: float2 + implicit 8-byte gap before float4 AmbientSHR float4 AmbientSHR; float4 AmbientSHG; float4 AmbientSHB; From 25a59f6a998e2723a759ecc544e5227c1690ae65 Mon Sep 17 00:00:00 2001 From: vrnord Date: Mon, 16 Mar 2026 10:21:31 -0600 Subject: [PATCH 09/16] fix(vr-stereo-opt): address PR review feedback and remove Upscaling changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review fixes: - DeferredCompositeCS: only early-out MODE_MAIN (mode==2), not MODE_EDGE - CASCS: add max(mxRGB, 1e-4) guard against division by zero - StereoBlendCS: clamp POM-corrected depth to prevent div-by-zero - StencilCS: use cbuffer DisocclusionThreshold instead of hardcoded constant - VR.hlsli: make Eye 1 jitter resolution-adaptive via FrameBuffer::BufferDim - Lighting.hlsl: include TRUE_PBR in pixelOffset export guard - ExtendedMaterials.hlsli: saturate() clamp on pixelOffset output - VRStereoOptimizations.cpp: add DeactivateStencil() to early-exit paths - VRStereoOptimizations.cpp: add null check for depthSRV - Globals.cpp: scope ClearDepthStencilView hook to main scene DSV only - StereoBlend.cpp: use terrain-aware Util::GetCurrentSceneDepthSRV() Revert Upscaling.cpp/h to upstream dev — all DLSS viewport scaling, TAAReorder, and periphery TAA code belongs exclusively in PR2. Co-Authored-By: Claude Opus 4.6 --- .../ExtendedMaterials/ExtendedMaterials.hlsli | 2 +- package/Shaders/Common/VR.hlsli | 4 +- package/Shaders/DeferredCompositeCS.hlsl | 2 +- package/Shaders/Lighting.hlsl | 2 +- package/Shaders/VR/CASCS.hlsl | 2 +- package/Shaders/VR/StereoBlendCS.hlsl | 4 +- .../VRStereoOptimizations/StencilCS.hlsl | 4 +- src/Features/Upscaling.cpp | 520 ++---------------- src/Features/Upscaling.h | 43 +- src/Features/VR/StereoBlend.cpp | 5 +- src/Features/VRStereoOptimizations.cpp | 12 +- src/Globals.cpp | 22 +- 12 files changed, 101 insertions(+), 521 deletions(-) diff --git a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli index 051b3c387f..bd18791f4a 100644 --- a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli +++ b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli @@ -497,7 +497,7 @@ namespace ExtendedMaterials #endif nearBlendToFar *= nearBlendToFar; float offset = (1.0 - parallaxAmount) * -maxHeight + minHeight; - pixelOffset = lerp(parallaxAmount, 0.5, nearBlendToFar); + pixelOffset = saturate(lerp(parallaxAmount, 0.5, nearBlendToFar)); return lerp(viewDirTS.xy * offset + coords.xy, coords, nearBlendToFar); } diff --git a/package/Shaders/Common/VR.hlsli b/package/Shaders/Common/VR.hlsli index 37f9238d41..46cf57a7e7 100644 --- a/package/Shaders/Common/VR.hlsli +++ b/package/Shaders/Common/VR.hlsli @@ -631,7 +631,9 @@ namespace Stereo // Larger offset increases chance of different alpha test outcomes between eyes // (tree branches vs sky). NDC for 6304x3088 SBS reference; scales with resolution. if (a_eyeIndex == 1) { - static const float2 kJitterNDC = float2(1.68e-4, -3.44e-4); + // ~0.75px diagonal jitter for Eye 1 stereo edge supersampling. + // Scales with resolution: 0.53/halfWidth horizontal, 1.06/height vertical. + float2 kJitterNDC = float2(0.53 / (FrameBuffer::BufferDim.x * 0.5), -1.06 / FrameBuffer::BufferDim.y); vsout.VRPosition.xy += kJitterNDC * vsout.VRPosition.w; } diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl index 88fa6f8632..61ad48bfb7 100644 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -100,7 +100,7 @@ void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, #if defined(VR_STEREO_OPT) if (eyeIndex == 1) { uint mode = StereoOptModeTexture[uint2(dispatchID.xy)]; - if (mode == 2 || mode == 1) { // MODE_MAIN or MODE_EDGE — stencil-culled, no valid G-buffer + if (mode == 2) { // MODE_MAIN — stencil-culled, no valid G-buffer return; } } diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index 3ffc7c311b..a28f6ed7ab 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -3197,7 +3197,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } # endif -# if defined(EMAT) && (defined(PARALLAX) || defined(LANDSCAPE)) +# if (defined(EMAT) || defined(TRUE_PBR)) && (defined(PARALLAX) || defined(LANDSCAPE)) psout.Reflectance = float4(indirectLobeWeights.specular, (pixelOffset > 0.0) ? saturate(pixelOffset) : 0.0); # else diff --git a/package/Shaders/VR/CASCS.hlsl b/package/Shaders/VR/CASCS.hlsl index 9c379e6792..6ed2768e43 100644 --- a/package/Shaders/VR/CASCS.hlsl +++ b/package/Shaders/VR/CASCS.hlsl @@ -57,7 +57,7 @@ RWTexture2D Dest : register(u0); mxRGB += mxRGB2; // Adaptive sharpening amount - float3 ampRGB = saturate(min(mnRGB, 2.0 - mxRGB) * rcp(mxRGB)); + float3 ampRGB = saturate(min(mnRGB, 2.0 - mxRGB) * rcp(max(mxRGB, 1e-4))); ampRGB = rsqrt(ampRGB); // Peak controls sharpening strength: diff --git a/package/Shaders/VR/StereoBlendCS.hlsl b/package/Shaders/VR/StereoBlendCS.hlsl index 961dd01ad5..c443d04d1b 100644 --- a/package/Shaders/VR/StereoBlendCS.hlsl +++ b/package/Shaders/VR/StereoBlendCS.hlsl @@ -162,7 +162,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (pomOffsetFB > 1e-2 && POMDepthScale > 0) { float linDepthFB = SharedData::GetScreenDepth(centerDepth); float depthCorrectionFB = (0.5 - pomOffsetFB) * POMDepthScale; - float newLinDepthFB = linDepthFB + depthCorrectionFB; + float newLinDepthFB = max(linDepthFB + depthCorrectionFB, 1e-4); reprojDepthFB = (SharedData::CameraData.x - SharedData::CameraData.w / newLinDepthFB) / SharedData::CameraData.z; } @@ -227,7 +227,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) // Re-reproject with POM-adjusted depth centered at geometry plane float linearDepth = SharedData::GetScreenDepth(centerDepth); float depthCorrection = (0.5 - pomOffset) * POMDepthScale; - float newLinearDepth = linearDepth + depthCorrection; + float newLinearDepth = max(linearDepth + depthCorrection, 1e-4); reprojDepth = (SharedData::CameraData.x - SharedData::CameraData.w / newLinearDepth) / SharedData::CameraData.z; r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); if (!r.valid) diff --git a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl index 7510c7d75c..6ff7b13376 100644 --- a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl +++ b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl @@ -16,8 +16,6 @@ Texture2D DepthTexture : register(t0); RWTexture2D ModeTextureRW : register(u0); -static const float kDisocclusionThreshold = 0.015; - [numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { if (any(dtid >= uint2(FrameDim))) return; @@ -78,7 +76,7 @@ static const float kDisocclusionThreshold = 0.015; float otherDepth = DepthTexture[reproj.otherPx]; float maxDepth = max(max(centerDepth, otherDepth), 1e-5); float relativeDepthDiff = abs(centerDepth - otherDepth) / maxDepth; - isDisoccluded = (relativeDepthDiff > kDisocclusionThreshold); + isDisoccluded = (relativeDepthDiff > DisocclusionThreshold); } if (isDisoccluded) { diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index e2215848f8..604f49e986 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -1,10 +1,8 @@ #include "Upscaling.h" #include "Deferred.h" -#include "Features/VRStereoOptimizations.h" #include "Hooks.h" #include "State.h" -#include "TAAReorder.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" #include "Upscaling/Streamline.h" @@ -27,9 +25,7 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( sharpnessFSR, sharpnessDLSS, presetDLSS, - useGatherWideKernel, - vrDlssViewportScale, - vrPeripheryTAA); + useGatherWideKernel); decltype(&D3D11CreateDeviceAndSwapChain) ptrD3D11CreateDeviceAndSwapChainUpscaling; @@ -65,9 +61,7 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( upscaling.CheckBackendFeatures(pAdapter); // Use better swap effect to prevent tearing and improve performance - // But don't change it for VR as it can affect frame pacing with the VR compositor - if (!globals::game::isVR) - pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; bool shouldProxy = !globals::game::isVR; if (shouldProxy) @@ -147,14 +141,8 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( ppImmediateContext); if (upscaling.IsBackendInitialized()) { - // Skip Streamline interface wrapping for VR — slUpgradeInterface wraps the D3D - // device and swap chain with Streamline proxy objects, which disrupts VR compositor - // frame pacing (causes judder/stuttering). DLSS still functions without wrapped - // interfaces; only frame generation requires them (and that's already VR-gated above). - if (!globals::game::isVR) { - upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); - upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); - } + upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); + upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); upscaling.SetBackendD3DDevice(*ppDevice); upscaling.PostBackendDevice(); } @@ -250,30 +238,6 @@ void Upscaling::DrawSettings() ImGui::Text("Set to 'Default' for automatic selection based on your Upscale Preset and hardware."); ImGui::Text("Changing this setting requires a restart to take effect."); } - - if (globals::game::isVR) { - if (ImGui::TreeNodeEx("VR Viewport Scaling", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::SliderFloat("DLSS Viewport Scale", &settings.vrDlssViewportScale, 0.5f, 1.0f, "%.2f"); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Controls the fraction of each eye's view that DLSS processes."); - ImGui::Text("Lower values = better performance, less visible area upscaled by DLSS."); - ImGui::Text("The VR lens hides the periphery, so 0.7-0.85 is recommended."); - ImGui::Text("The periphery outside the DLSS region is filled with a bilinear upscale."); - } - - if (settings.vrDlssViewportScale < 1.0f) { - bool peripheryTAA = settings.vrPeripheryTAA != 0; - if (ImGui::Checkbox("Periphery TAA", &peripheryTAA)) - settings.vrPeripheryTAA = peripheryTAA ? 1 : 0; - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Applies temporal anti-aliasing to the bilinear-upscaled periphery."); - ImGui::Text("Reduces shimmer and improves peripheral quality."); - ImGui::Text("The DLSS center region passes through unchanged."); - } - } - ImGui::TreePop(); - } - } } if (globals::game::isVR) { @@ -496,7 +460,6 @@ void Upscaling::LoadSettings(json& o_json) logger::warn("[Upscaling] Loaded useGatherWideKernel {} out of range, clamping to 1", settings.useGatherWideKernel); settings.useGatherWideKernel = 1; } - settings.vrDlssViewportScale = std::clamp(settings.vrDlssViewportScale, 0.5f, 1.0f); auto iniSettingCollection = globals::game::iniPrefSettingCollection; if (iniSettingCollection) { auto setting = iniSettingCollection->GetSetting("bUseTAA:Display"); @@ -524,10 +487,6 @@ void Upscaling::DataLoaded() void Upscaling::Load() { *(uintptr_t*)&ptrD3D11CreateDeviceAndSwapChainUpscaling = SKSE::PatchIAT(hk_D3D11CreateDeviceAndSwapChainUpscaling, "d3d11.dll", "D3D11CreateDeviceAndSwapChain"); - - // Install depth/stencil registration hook early (before renderer creates targets) - if (globals::game::isVR) - TAAReorder::InitEarly(); } struct BSImageSpace_Init_FXAA @@ -571,10 +530,6 @@ void Upscaling::PostPostLoad() stl::detour_thunk(REL::RelocationID(98974, 105626)); logger::info("[Upscaling] Installed hooks"); - - // Install TAA reordering hooks for VR periphery TAA - if (globals::game::isVR) - TAAReorder::Init(); } Upscaling::UpscaleMethod Upscaling::GetUpscaleMethod() const @@ -684,13 +639,6 @@ void Upscaling::DestroyUpscalingTextureResources(UpscaleMethod a_upscalemethod) } } - // VR periphery TAA textures - only needed for DLSS with viewport scaling - if (a_upscalemethod != UpscaleMethod::kDLSS) { - vrPreTAACopy = nullptr; - for (int i = 0; i < 2; i++) - vrTAAdPerEye[i].reset(); - } - // Motion vector copy texture is only needed for DLSS - destroy when switching away from DLSS if (a_upscalemethod != UpscaleMethod::kDLSS) { if (motionVectorCopyTexture) { @@ -744,8 +692,6 @@ void Upscaling::CheckResources(UpscaleMethod a_upscalemethod) vrIntermediateMotionVectors[i].reset(); vrIntermediateReactiveMask[i].reset(); vrIntermediateTransparencyMask[i].reset(); - vrFinalOutput[i].reset(); - vrCropColorIn[i].reset(); } } } @@ -798,8 +744,6 @@ ID3D11PixelShader* Upscaling::GetDepthRefractionUpscalePS() if (!depthRefractionUpscalePS) { logger::debug("Compiling DepthRefractionUpscalePS.hlsl"); std::vector> defines = { { "PSHADER", "" } }; - if (globals::game::isVR) - defines.push_back({ "VR", "" }); depthRefractionUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DepthRefractionUpscalePS.hlsl", defines, "ps_5_0")); } @@ -827,37 +771,6 @@ ID3D11VertexShader* Upscaling::GetUpscaleVS() return upscaleVS.get(); } -ID3D11PixelShader* Upscaling::GetDlssCompositePS() -{ - if (!vrDlssCompositePS) { - logger::debug("Compiling DLSSCompositePS.hlsl"); - vrDlssCompositePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0")); - } - - return vrDlssCompositePS.get(); -} - -ID3D11PixelShader* Upscaling::GetDlssUpscalePS() -{ - if (!vrDlssUpscalePS) { - logger::debug("Compiling DLSSCompositePS.hlsl (BILINEAR_UPSCALE)"); - vrDlssUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader( - L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", - { { "PSHADER", "" }, { "BILINEAR_UPSCALE", "" } }, "ps_5_0")); - } - - if (!vrDlssUpscaleCB) { - D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = sizeof(DlssCompositeCB); - cbDesc.Usage = D3D11_USAGE_DYNAMIC; - cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrDlssUpscaleCB.put())); - } - - return vrDlssUpscalePS.get(); -} - eastl::unique_ptr Upscaling::CreateTextureFromSource(ID3D11Resource* src, uint32_t width, uint32_t height, bool copyBindFlags, bool createSRV, bool createUAV, const char* name) { @@ -907,7 +820,7 @@ void Upscaling::CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight std::string suffix = (i == 0) ? "Left" : "Right"; vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, inWidth, inHeight, false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); - vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, false, ("Upscale_ColorOut_" + suffix).c_str()); // Depth: R32_TYPELESS base (matches kMAIN), with R32_FLOAT SRV for ClearHMDMaskCS. // CopySubresourceRegion requires matching typeless formats; SRV reinterprets as R32_FLOAT. @@ -960,185 +873,41 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; - float vpScale = settings.vrDlssViewportScale; - auto upscaleMethod = GetUpscaleMethod(); - bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); - - if (viewportScaling) { - // Viewport scaling: physically crop all DLSS inputs to eliminate non-zero subrect offsets. - // vrIntermediateColorIn stays at FULL render-res (for ClearHMDMask + FillPeriphery). - // All other DLSS inputs (depth, mvec, masks) are CROP-sized. - // vrCropColorIn is CROP-sized (DLSS color input, extracted from masked full color). - // This ensures DLSS sees all inputs at {0,0} with no subrect base offsets, - // which is critical for correct temporal reprojection during camera motion. - uint32_t cropWidthIn = (uint32_t)(eyeWidthIn * vpScale); - uint32_t cropHeightIn = (uint32_t)(eyeHeightIn * vpScale); - uint32_t cropWidthOut = (uint32_t)(eyeWidthOut * vpScale); - uint32_t cropHeightOut = (uint32_t)(eyeHeightOut * vpScale); - - bool needsRecreate = !vrIntermediateColorIn[0] || !vrCropColorIn[0] || !vrIntermediateDepth[0] || - !vrIntermediateColorOut[0] || !vrFinalOutput[0]; - if (!needsRecreate) { - // Check format too — periphery TAA feeds R8G8B8A8 post-PP intermediate, - // while normal DLSS feeds R11G11B10 kMAIN. Must recreate on format change. - D3D11_TEXTURE2D_DESC srcDesc; - ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); - needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || - vrCropColorIn[0]->desc.Width != cropWidthIn || - vrCropColorIn[0]->desc.Height != cropHeightIn || - vrIntermediateDepth[0]->desc.Width != cropWidthIn || - vrIntermediateDepth[0]->desc.Height != cropHeightIn || - vrIntermediateColorOut[0]->desc.Width != cropWidthOut || - vrIntermediateColorOut[0]->desc.Height != cropHeightOut || - vrFinalOutput[0]->desc.Width != eyeWidthOut || - vrFinalOutput[0]->desc.Height != eyeHeightOut); - } - - if (needsRecreate) { - logger::info("[Upscaling] (Re)creating VR viewport-scaled intermediates: full {}x{}, crop in {}x{}, crop out {}x{}", - eyeWidthIn, eyeHeightIn, cropWidthIn, cropHeightIn, cropWidthOut, cropHeightOut); - - for (int i = 0; i < 2; i++) { - std::string suffix = (i == 0) ? "Left" : "Right"; - - // Full-size color for ClearHMDMask + FillPeriphery - vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, eyeWidthIn, eyeHeightIn, - false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); - - // Crop-sized DLSS color input (needs UAV for ClearHMDMask) - vrCropColorIn[i] = CreateTextureFromSource(colorSrc, cropWidthIn, cropHeightIn, - false, true, true, ("Upscale_CropColorIn_" + suffix).c_str()); - - // Crop-sized DLSS output - vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, cropWidthOut, cropHeightOut, - false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); - - // Crop-sized depth (R32_TYPELESS with R32_FLOAT SRV) - { - D3D11_TEXTURE2D_DESC depthDesc = {}; - depthDesc.Width = cropWidthIn; - depthDesc.Height = cropHeightIn; - depthDesc.MipLevels = 1; - depthDesc.ArraySize = 1; - depthDesc.Format = DXGI_FORMAT_R32_TYPELESS; - depthDesc.SampleDesc.Count = 1; - depthDesc.Usage = D3D11_USAGE_DEFAULT; - depthDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - vrIntermediateDepth[i] = eastl::make_unique(depthDesc); - Util::SetResourceName(vrIntermediateDepth[i]->resource.get(), ("Upscale_Depth_" + suffix).c_str()); - - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Format = DXGI_FORMAT_R32_FLOAT; - srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MipLevels = 1; - vrIntermediateDepth[i]->CreateSRV(srvDesc); - } - - // Crop-sized motion vectors, reactive mask, transparency mask - vrIntermediateMotionVectors[i] = CreateTextureFromSource(mvecSrc, cropWidthIn, cropHeightIn, - false, true, false, ("Upscale_MVec_" + suffix).c_str()); - vrIntermediateReactiveMask[i] = CreateTextureFromSource(reactiveSrc, cropWidthIn, cropHeightIn, - false, true, false, ("Upscale_Reactive_" + suffix).c_str()); - vrIntermediateTransparencyMask[i] = CreateTextureFromSource(transparencySrc, cropWidthIn, cropHeightIn, - false, true, false, ("Upscale_Transparency_" + suffix).c_str()); - - // Full display-res composition target - vrFinalOutput[i] = CreateTextureFromSource(colorSrc, eyeWidthOut, eyeHeightOut, - false, true, true, ("Upscale_FinalOutput_" + suffix).c_str()); - } - } - - // Copy full eye to full-size vrIntermediateColorIn (raw render-res, no HMD mask yet) - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; - context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); - } - - uint32_t cropOffsetX = (eyeWidthIn - cropWidthIn) / 2; - uint32_t cropOffsetY = (eyeHeightIn - cropHeightIn) / 2; - auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; - - // Extract DLSS crop from raw buffer (before TAA or HMD mask), then mask the crop directly. - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - - // Crop color from raw (unmasked, non-TAA'd) full-size buffer - D3D11_BOX cropBox = { cropOffsetX, cropOffsetY, 0, cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; - context->CopySubresourceRegion(vrCropColorIn[i]->resource.get(), 0, 0, 0, 0, - vrIntermediateColorIn[i]->resource.get(), 0, &cropBox); - - // ClearHMDMask directly on the crop (depth offset accounts for eye + crop position in stereo buffer) - ClearHMDMask(vrCropColorIn[i]->uav.get(), depthTexture.depthSRV, - cropWidthIn, cropHeightIn, offsetXIn + cropOffsetX, 0, cropOffsetY); - - // Crop depth/mvec/reactive/transparency directly from stereo buffers - D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, - offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; - context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, - depthSrc, 0, &stereoCropBox); - context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, - mvecSrc, 0, &stereoCropBox); - context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, - reactiveSrc, 0, &stereoCropBox); - context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, - transparencySrc, 0, &stereoCropBox); - } + bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; + if (!needsRecreate) { + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + } + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", + eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, + colorSrc, mvecSrc, reactiveSrc, transparencySrc); + } - // ClearHMDMask on full-size buffer (for FillPeriphery) - for (uint32_t i = 0; i < 2; ++i) { - uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; - ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, - eyeWidthIn, eyeHeightIn, depthOffset, 0); - } - } else { - // Non-viewport-scaling path: all textures at full per-eye dimensions - bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; - if (!needsRecreate) { - D3D11_TEXTURE2D_DESC srcDesc; - ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); - needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); - } - if (needsRecreate) { - logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", - eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); - CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, - colorSrc, mvecSrc, reactiveSrc, transparencySrc); - } + // Extract both eyes' inputs from combined stereo buffers + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); + } - // Release viewport-scaling-specific textures - for (int i = 0; i < 2; i++) { - vrCropColorIn[i].reset(); - vrFinalOutput[i].reset(); - vrTAAdPerEye[i].reset(); - } - vrPreTAACopy = nullptr; - - // Copy full eye to per-eye intermediates - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; - - context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); - } + // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer. + // Depth is read from the combined stereo SRV at the per-eye offset; color is written + // to the isolated per-eye UAV (ColorOffsetX = 0). + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; - // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer - auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; - for (uint32_t i = 0; i < 2; ++i) { - uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; - ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, - eyeWidthIn, eyeHeightIn, depthOffset, 0); - } + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); } if (state->frameAnnotations) @@ -1160,34 +929,11 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); uint32_t eyeHeightOut = (uint32_t)screenSize.y; - float vpScale = settings.vrDlssViewportScale; - auto upscaleMethod = GetUpscaleMethod(); - bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); - + // Write upscaled outputs back for (uint32_t i = 0; i < 2; ++i) { uint32_t offsetXOut = (i == 1) ? eyeWidthOut : 0; - - if (viewportScaling && vrFinalOutput[i]) { - // Paste crop-sized DLSS output into center of full-size composition target - uint32_t dlssWidthOut = vrIntermediateColorOut[i]->desc.Width; - uint32_t dlssHeightOut = vrIntermediateColorOut[i]->desc.Height; - uint32_t pasteX = (eyeWidthOut - dlssWidthOut) / 2; - uint32_t pasteY = (eyeHeightOut - dlssHeightOut) / 2; - - D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; - context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, - vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); - - // Copy composition target to stereo buffer - D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; - context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, - vrFinalOutput[i]->resource.get(), 0, &outBox); - } else { - // Direct copy DLSS output to stereo buffer - D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; - context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, - vrIntermediateColorOut[i]->resource.get(), 0, &outBox); - } + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, vrIntermediateColorOut[i]->resource.get(), 0, &outBox); } if (state->frameAnnotations) @@ -1195,11 +941,7 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) } void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, - uint32_t depthOffsetY, - uint32_t depthWidth, uint32_t depthHeight, - uint32_t colorWidth, uint32_t colorHeight, - ID3D11ShaderResourceView* fallbackSRV, uint32_t fallbackOffsetX) + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX) { if (!globals::game::isVR) return; @@ -1210,7 +952,7 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe vrClearHMDMaskCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/ClearHMDMaskCS.hlsl", {}, "cs_5_0")); D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = 32; // 8 uints (offsets + optional scaling dimensions) + cbDesc.ByteWidth = 16; // 4 uints cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -1223,9 +965,8 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->CSSetShader(vrClearHMDMaskCS.get(), nullptr, 0); - // t0 = depth, t1 = fallback (nullptr → unbound → reads return (0,0,0,0) → black) - ID3D11ShaderResourceView* srvs[2] = { depthSRV, fallbackSRV }; - context->CSSetShaderResources(0, 2, srvs); + ID3D11ShaderResourceView* srvs[1] = { depthSRV }; + context->CSSetShaderResources(0, 1, srvs); ID3D11UnorderedAccessView* uavs[1] = { colorUAV }; context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); @@ -1233,10 +974,9 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe D3D11_MAPPED_SUBRESOURCE mapped{}; context->Map(vrClearHMDMaskCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - uint32_t cbData[8] = { depthOffsetX, colorOffsetX, depthOffsetY, fallbackOffsetX, - depthWidth, depthHeight, colorWidth, colorHeight }; + uint32_t offsets[4] = { depthOffsetX, colorOffsetX, 0, 0 }; - memcpy(mapped.pData, cbData, sizeof(cbData)); + memcpy(mapped.pData, offsets, sizeof(offsets)); context->Unmap(vrClearHMDMaskCB.get(), 0); ID3D11Buffer* cbs[1] = { vrClearHMDMaskCB.get() }; @@ -1244,81 +984,13 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->Dispatch(dispatchX, dispatchY, 1); - // Unbind - ID3D11ShaderResourceView* nullSRVs[2] = { nullptr, nullptr }; - ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; - ID3D11Buffer* nullCB[1] = { nullptr }; - context->CSSetShaderResources(0, 2, nullSRVs); - context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); - context->CSSetConstantBuffers(0, 1, nullCB); - context->CSSetShader(nullptr, nullptr, 0); - } -} - -void Upscaling::FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, - uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV) -{ - if (!globals::game::isVR || !vrFinalOutput[eyeIndex]) - return; - if (!overrideSRV && !vrIntermediateColorIn[eyeIndex]) - return; - - auto context = globals::d3d::context; - - if (!vrPeripheryFillCS) { - vrPeripheryFillCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/VRPeripheryFillCS.hlsl", {}, "cs_5_0")); - - D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = 16; // 4 uints - cbDesc.Usage = D3D11_USAGE_DYNAMIC; - cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrPeripheryFillCB.put())); - - D3D11_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&samplerDesc, vrLinearSampler.put())); - } - - if (vrPeripheryFillCS) { - auto dispatchX = (dstWidth + 7) / 8; - auto dispatchY = (dstHeight + 7) / 8; - - context->CSSetShader(vrPeripheryFillCS.get(), nullptr, 0); - - // Read from overrideSRV (e.g. TAA texture) or default render-res per-eye input. - ID3D11ShaderResourceView* srvs[1] = { overrideSRV ? overrideSRV : vrIntermediateColorIn[eyeIndex]->srv.get() }; - context->CSSetShaderResources(0, 1, srvs); - - ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[eyeIndex]->uav.get() }; - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - - ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; - context->CSSetSamplers(0, 1, samplers); - - D3D11_MAPPED_SUBRESOURCE mapped{}; - context->Map(vrPeripheryFillCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - uint32_t cbData[4] = { srcWidth, srcHeight, dstWidth, dstHeight }; - memcpy(mapped.pData, cbData, sizeof(cbData)); - context->Unmap(vrPeripheryFillCB.get(), 0); - - ID3D11Buffer* cbs[1] = { vrPeripheryFillCB.get() }; - context->CSSetConstantBuffers(0, 1, cbs); - - context->Dispatch(dispatchX, dispatchY, 1); - // Unbind ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; ID3D11Buffer* nullCB[1] = { nullptr }; - ID3D11SamplerState* nullSampler[1] = { nullptr }; context->CSSetShaderResources(0, 1, nullSRV); context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); context->CSSetConstantBuffers(0, 1, nullCB); - context->CSSetSamplers(0, 1, nullSampler); context->CSSetShader(nullptr, nullptr, 0); } } @@ -1429,10 +1101,6 @@ void Upscaling::ConfigureUpscaling(RE::BSGraphics::State* a_viewport) // Disable dynamic resolution unless the game explicitly enables it if (!globals::game::isVR) runtimeData.dynamicResolutionLock = 1; - - // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. - // No longer need to force-disable culling when upscaling is active. - // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs. } void Upscaling::SetupResources() @@ -1477,7 +1145,6 @@ void Upscaling::SetupResources() depthStencilDesc.BackFace.StencilDepthFailOp = depthStencilDesc.FrontFace.StencilDepthFailOp; depthStencilDesc.BackFace.StencilPassOp = depthStencilDesc.FrontFace.StencilPassOp; depthStencilDesc.BackFace.StencilFunc = depthStencilDesc.FrontFace.StencilFunc; - } else { depthStencilDesc.StencilEnable = false; // Disable stencil testing } @@ -1531,12 +1198,6 @@ void Upscaling::ClearShaderCache() depthRefractionUpscalePS = nullptr; // com_ptr automatically releases underwaterMaskUpscalePS = nullptr; // com_ptr automatically releases upscaleVS = nullptr; // com_ptr automatically releases - vrClearHMDMaskCS = nullptr; - vrPeripheryFillCS = nullptr; - vrPeripheryFillCB = nullptr; - vrDlssCompositePS = nullptr; - vrDlssUpscalePS = nullptr; - vrDlssUpscaleCB = nullptr; } void Upscaling::CopySharedD3D12Resources() @@ -1747,17 +1408,6 @@ bool Upscaling::IsUpscalingActive() const return resolutionScale.x < .99f; } -std::vector Upscaling::GetActiveConstraints() const -{ - std::vector constraints; - - // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. - // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs, - // so we no longer need to constrain depth buffer culling when upscaling is active. - - return constraints; -} - /** * @brief Retrieves the current frame time for frame generation. * @@ -1869,7 +1519,7 @@ Upscaling::BlurResources Upscaling::GetBlurResources() const return {}; } -void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) +void Upscaling::Upscale() { auto upscaleMethod = GetUpscaleMethod(); @@ -1930,11 +1580,8 @@ void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) { state->BeginPerfEvent("Upscaling"); - // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) - ID3D11Resource* colorSrc = colorSourceOverride ? static_cast(colorSourceOverride) : static_cast(main.texture); - if (upscaleMethod == UpscaleMethod::kDLSS) { - streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); + streamline.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); } else if (upscaleMethod == UpscaleMethod::kFSR) { fidelityFX.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVector.texture, settings.sharpnessFSR); } @@ -2174,73 +1821,20 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 if (upscaling.d3d12SwapChainActive && upscaling.settings.frameGenerationMode) upscaling.CopySharedD3D12Resources(); - // Increment diagnostic counter (rate-limits TAAReorder logging) - if (TAAReorder::g_initialized) { - TAAReorder::g_diagCounter = (TAAReorder::g_diagCounter + 1) % TAAReorder::DIAG_INTERVAL; - if (TAAReorder::g_diagCounter == 0) { - TAAReorder::g_frameSeqCounter = 0; - logger::info("[SEQ] Main_PostProcessing START seq={}", TAAReorder::g_frameSeqCounter++); - } - } - - bool peripheryTAA = TAAReorder::ShouldReorderTAA(); - - if (peripheryTAA) { - // ─── Periphery TAA with post-conductor DLSS (PureDark's approach) ─── - // func() with TAA enabled → conductor runs all passes unimpeded: - // Phase 2A: ExecutePassHook captures post-PP intermediate to g_postPPCopy - // Phase 5: TAA + DRS → submit texture - // After conductor: ConductorCallHook evaluates DLSS on g_postPPCopy, - // then pastes DLSS center onto submit texture - - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); - - // Reset per-frame flags - TAAReorder::g_postPPReady = false; - TAAReorder::g_dlssReady = false; - TAAReorder::g_dlssPasteComplete = false; - TAAReorder::g_phase5Complete = false; - TAAReorder::g_bsHookCallCount = 0; + if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) + upscaling.PerformUpscaling(); - if (TAAReorder::g_diagCounter == 0) - logger::info("[TAAReorder] peripheryTAA: running func() with TAA enabled..."); + if (upscaleMethod == UpscaleMethod::kDLSS) + upscaling.ApplySharpening(); - // func() with TAA ENABLED — DLSS eval + paste in ConductorCallHook (post-conductor) - BSImagespaceShaderISTemporalAA->taaEnabled = true; - func(a_this, a3, a_target, a_4, a_5); - - // Lock DRS + update camera (after conductor completes) - auto& runtimeData = globals::game::graphicsState->GetRuntimeData(); - runtimeData.dynamicResolutionLock = 1; - UpdateCameraData(); - - // Disable TAA for remainder of frame - BSImagespaceShaderISTemporalAA->taaEnabled = false; - } else { - // ─── Normal flow (no periphery TAA) ─── - if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) - upscaling.PerformUpscaling(); - - if (upscaleMethod == UpscaleMethod::kDLSS) - upscaling.ApplySharpening(); - - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); - - BSImagespaceShaderISTemporalAA->taaEnabled = (upscaleMethod == UpscaleMethod::kTAA); - - if (TAAReorder::g_diagCounter == 0 && TAAReorder::g_initialized) - logger::info("[DIAG] Normal DLSS flow: taaEnabled={}, running func()...", BSImagespaceShaderISTemporalAA->taaEnabled); + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); - func(a_this, a3, a_target, a_4, a_5); + BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod == UpscaleMethod::kTAA; - BSImagespaceShaderISTemporalAA->taaEnabled = false; - } + func(a_this, a3, a_target, a_4, a_5); - // VR CAS sharpening (after TAA) - if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) - globals::features::vrStereoOptimizations.ApplyCAS(a_target); + BSImagespaceShaderISTemporalAA->taaEnabled = false; } void Upscaling::SetScissorRect::thunk(RE::BSGraphics::Renderer* This, int a_left, int a_top, int a_right, int a_bottom) diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index eace6aa3c3..6cecf6cbaf 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -57,10 +57,8 @@ struct Upscaling : Feature uint streamlineLogLevel = 0; // 0=Off, 1=Default, 2=Verbose float sharpnessFSR = 0.0f; float sharpnessDLSS = 0.0f; - uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M - uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel - float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) - uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) + uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M + uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel }; Settings settings; @@ -112,7 +110,6 @@ struct Upscaling : Feature virtual void Load() override; virtual void PostPostLoad() override; virtual void SetupResources() override; - virtual std::vector GetActiveConstraints() const override; UpscaleMethod GetUpscaleMethod() const; @@ -141,11 +138,7 @@ struct Upscaling : Feature winrt::com_ptr vrClearHMDMaskCB; // Helper to dispatch mask clearing for a single eye region void ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, - uint32_t depthOffsetY = 0, - uint32_t depthWidth = 0, uint32_t depthHeight = 0, - uint32_t colorWidth = 0, uint32_t colorHeight = 0, - ID3D11ShaderResourceView* fallbackSRV = nullptr, uint32_t fallbackOffsetX = 0); + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX); // Shared VR Per-Eye Intermediate Buffers // Owned here so both Streamline (DLSS) and FidelityFX (FSR) can use them. @@ -155,34 +148,6 @@ struct Upscaling : Feature eastl::unique_ptr vrIntermediateMotionVectors[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateReactiveMask[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateTransparencyMask[2]; // per-eye render resolution - eastl::unique_ptr vrFinalOutput[2]; // per-eye display-res composition target (VR viewport scaling) - eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) - - // Periphery TAA (conductor approach) — used by two-call func() flow - winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) - eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) - - // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) - winrt::com_ptr vrPeripheryFillCS; - winrt::com_ptr vrPeripheryFillCB; - winrt::com_ptr vrLinearSampler; - - // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) - winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) - winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) - winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params - ID3D11PixelShader* GetDlssCompositePS(); - ID3D11PixelShader* GetDlssUpscalePS(); - - struct DlssCompositeCB - { - float2 DynResScale; // renderRes / displayRes per-eye - float2 EyeOffset; // (i * eyeWidth, 0) - float2 SrcTexSize; // full texture dimensions - float2 pad; - }; - void FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, - uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV = nullptr); // Helper to create/resize per-eye buffers matching source formats void CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight, uint32_t outWidth, uint32_t outHeight, @@ -199,7 +164,7 @@ struct Upscaling : Feature void ConfigureTAA(); void ConfigureUpscaling(RE::BSGraphics::State* a_state); - void Upscale(ID3D11Texture2D* colorSourceOverride = nullptr); + void Upscale(); // D3D11 textures Texture2D* reactiveMaskTexture = nullptr; diff --git a/src/Features/VR/StereoBlend.cpp b/src/Features/VR/StereoBlend.cpp index cdf945b044..61ce5763f7 100644 --- a/src/Features/VR/StereoBlend.cpp +++ b/src/Features/VR/StereoBlend.cpp @@ -6,6 +6,7 @@ #include "Features/ScreenSpaceShadows.h" #include "Features/VRStereoOptimizations.h" #include "State.h" +#include "Utils/D3D.h" void VR::ClearShaderCache() { @@ -51,9 +52,7 @@ void VR::DrawStereoBlend() auto renderer = globals::game::renderer; auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; - // Use live depth buffer (kMAIN) — at DeferredPasses time this has the correct - // opaque geometry depth matching the composited color buffer. - auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + auto* depthSRV = Util::GetCurrentSceneDepthSRV(); context->CopyResource(stereoBlendCopyTex->resource.get(), main.texture); diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp index 5a1fe516ef..4b9fb4d481 100644 --- a/src/Features/VRStereoOptimizations.cpp +++ b/src/Features/VRStereoOptimizations.cpp @@ -421,6 +421,10 @@ void VRStereoOptimizations::DispatchStencil() // StencilCS can correctly detect sky-vs-geometry edges in the current frame. auto renderer = globals::game::renderer; auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + if (!depthSRV) { + logger::warn("[VRStereoOptimizations] DispatchStencil: depthSRV is null, skipping"); + return; + } // Dispatch classification CS over Eye 1 region // Input: t0 = depth, b1 = params CB @@ -659,10 +663,14 @@ void VRStereoOptimizations::DispatchReprojection() return; if (settings.stereoMode == StereoMode::Off) return; - if (!reprojectionCS || !texPerPixelMode || !paramsCB) + if (!reprojectionCS || !texPerPixelMode || !paramsCB) { + DeactivateStencil(); return; - if (settings.debugSkipMerge) + } + if (settings.debugSkipMerge) { + DeactivateStencil(); return; + } ZoneScoped; TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Reprojection"); diff --git a/src/Globals.cpp b/src/Globals.cpp index 483fe10087..907b7dcba0 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -314,10 +314,24 @@ namespace globals if (globals::game::isVR) { auto& stereoOpt = globals::features::vrStereoOptimizations; if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { - // Strip stencil clear to preserve our marks; allow depth clear to proceed - ClearFlags &= ~D3D11_CLEAR_STENCIL; - if (ClearFlags == 0) - return; // Nothing left to clear + // Only protect the main scene DSV — allow other DSVs to clear normally + auto renderer = globals::game::renderer; + auto& mainDepth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (mainDepth.views[0]) { + // Compare the DSV being cleared against the main scene DSV + ID3D11Resource* clearRes = nullptr; + ID3D11Resource* mainRes = nullptr; + pDepthStencilView->GetResource(&clearRes); + mainDepth.views[0]->GetResource(&mainRes); + bool isMainDSV = (clearRes == mainRes); + if (clearRes) clearRes->Release(); + if (mainRes) mainRes->Release(); + if (isMainDSV) { + ClearFlags &= ~D3D11_CLEAR_STENCIL; + if (ClearFlags == 0) + return; + } + } } } func(This, pDepthStencilView, ClearFlags, Depth, Stencil); From 171a8ac798beaf524d02d7f0d15888e41cb8ad2c Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:33:13 -0600 Subject: [PATCH 10/16] feat(vr-dlss): VR upscaling shaders for DLSS viewport scaling pipeline Add HLSL shaders for the VR DLSS periphery pipeline: - ClearHMDMaskCS: Enhanced with fallback texture support and configurable depth/color region offsets for viewport scaling - DLSSCompositePS: Format-converting fullscreen copy (point-sample) for pasting DLSS output onto the submit texture - DepthUpscalePS: Depth buffer upscaling with point sampling to preserve depth discontinuities at object edges - FeatheredCompositeCS/PS: Feathered alpha blend at DLSS crop boundary using smoothstep falloff (CS fallback + PS with hardware blend) - ForceAlphaCS: Sets alpha=1.0 on submit texture to fix Scaleform UI rendering after DLSS writes non-opaque alpha values - VRPeripheryFillCS: Bilinear upscale from render-res to display-res for the peripheral region outside the DLSS crop Based in part on techniques from PureDark's Skyrim-Upscaler VR (MIT license: https://github.com/PureDark/Skyrim-Upscaler) Co-Authored-By: Claude Opus 4.6 --- .../Shaders/Upscaling/ClearHMDMaskCS.hlsl | 45 +++++++++--- .../Shaders/Upscaling/DLSSCompositePS.hlsl | 48 +++++++++++++ .../Shaders/Upscaling/DepthUpscalePS.hlsl | 70 +++++++++++++++++++ .../Upscaling/FeatheredCompositeCS.hlsl | 43 ++++++++++++ .../Upscaling/FeatheredCompositePS.hlsl | 56 +++++++++++++++ .../Shaders/Upscaling/ForceAlphaCS.hlsl | 13 ++++ .../Shaders/Upscaling/VRPeripheryFillCS.hlsl | 26 +++++++ 7 files changed, 293 insertions(+), 8 deletions(-) create mode 100644 features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl diff --git a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl index df107d9175..c5c60611d4 100644 --- a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl @@ -4,20 +4,49 @@ // depth == 0.0 is the unrendered/hidden area value (Skyrim reversed-Z: far plane = 0). // DepthIn is the combined stereo depth buffer; DepthOffsetX selects the eye's half. // ColorInOut is the isolated per-eye buffer; ColorOffsetX is always 0. +// +// When DepthWidth > 0, coordinate scaling is enabled: depth is at render-res while +// color is at display-res. The shader maps display-res color coordinates to render-res +// depth coordinates for the mask lookup. +// +// FallbackIn (t1): when bound, masked pixels read from this texture instead of writing +// black. When unbound, D3D11 returns (0,0,0,0) — same as clearing to black. +// FallbackOffsetX selects the eye's half in the stereo fallback texture. cbuffer ClearHMDMaskCB : register(b0) { - uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) - uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) - uint pad0; - uint pad1; + uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) + uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) + uint DepthOffsetY; // Y offset into combined stereo depth (non-zero when viewport scaling crops vertically) + uint FallbackOffsetX; // X offset into FallbackIn for stereo (0 when unused or left eye) + // Optional coordinate scaling (zero = disabled, for backwards compat) + uint DepthWidth; // render-res eye width; if 0, no scaling (1:1 depth/color coords) + uint DepthHeight; // render-res eye height + uint ColorWidth; // display-res eye width + uint ColorHeight; // display-res eye height }; Texture2D DepthIn : register(t0); +Texture2D FallbackIn : register(t1); RWTexture2D ColorInOut : register(u0); -[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { - // Read from stereo depth, write to potentially stereo color - if (DepthIn[dispatchID.xy + uint2(DepthOffsetX, 0)] == 0.0) - ColorInOut[dispatchID.xy + uint2(ColorOffsetX, 0)] = float4(0.0, 0.0, 0.0, 0.0); +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) +{ + uint2 colorPos = dispatchID.xy + uint2(ColorOffsetX, 0); + uint2 depthPos; + + if (DepthWidth > 0) { + // Scale from display-res color coordinates to render-res depth coordinates + depthPos = uint2( + (dispatchID.x * DepthWidth) / ColorWidth, + (dispatchID.y * DepthHeight) / ColorHeight + ) + uint2(DepthOffsetX, DepthOffsetY); + } else { + depthPos = dispatchID.xy + uint2(DepthOffsetX, DepthOffsetY); + } + + if (DepthIn[depthPos] == 0.0) + ColorInOut[colorPos] = FallbackIn[dispatchID.xy + uint2(FallbackOffsetX, 0)]; + // When FallbackIn is unbound (existing callers): returns (0,0,0,0) → clears to black + // When FallbackIn is bound (TAA mask restore): returns display RT content } diff --git a/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl new file mode 100644 index 0000000000..da776ae442 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl @@ -0,0 +1,48 @@ +// Format-converting fullscreen pixel shader with optional bilinear upscale. +// Used by TAAReorder to composite between textures of different DXGI formats +// (e.g. R8G8B8A8_UNORM conductor RTs <-> R11G11B10_FLOAT kMAIN). +// The GPU's output merger handles format conversion automatically. +// +// BILINEAR_UPSCALE variant: upscales render-res content to display-res by +// mapping output pixel positions through the dynamic resolution scale, +// like PureDark's dynamicResScale in his blend shader. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D Source : register(t0); + +#ifdef BILINEAR_UPSCALE + +cbuffer CompositeCB : register(b0) +{ + float2 DynResScale; // renderRes / displayRes (per-eye) + float2 EyeOffset; // (i * eyeWidth, 0) in texels + float2 SrcTexSize; // full texture dimensions in texels + float2 pad; +}; + +SamplerState LinearSampler : register(s0); + +float4 main(VS_OUTPUT input) : SV_Target +{ + // Map display-res pixel position to render-res source position. + // Subtract eye offset, scale to render-res, add eye offset back. + float2 localPos = input.Position.xy - EyeOffset; + float2 srcLocal = localPos * DynResScale; + float2 srcPos = srcLocal + EyeOffset; + float2 srcUV = srcPos / SrcTexSize; + return Source.SampleLevel(LinearSampler, srcUV, 0); +} + +#else + +float4 main(VS_OUTPUT input) : SV_Target +{ + return Source.Load(int3(input.Position.xy, 0)); +} + +#endif // BILINEAR_UPSCALE + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl new file mode 100644 index 0000000000..41ade15e2d --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl @@ -0,0 +1,70 @@ +/** + * @file DepthUpscalePS.hlsl + * @brief Point-sampled depth buffer upscaling for VR depth-based culling + * + * When upscaling (FSR/DLSS) is active, the depth buffer is rendered at a lower + * resolution than the display. Skyrim VR's depth-based culling (OBBOcclusionTesting) + * reads from the depth buffer to determine object visibility, but with a mismatched + * resolution, objects may be incorrectly culled (appearing to flicker in/out of view). + * + * This shader upscales the low-resolution depth buffer to full resolution using + * pure point sampling. Previous conservative blending (GatherRed + lerp toward + * min depth) caused HAM mask bleed: depth == 0 values from the hidden area mesh + * leaked into valid depth through the 2x2 neighborhood blend, creating artifacts + * at the mask boundary after DRS upscaling. + * + * Based on depth upscaling approach by vrnord + * https://github.com/vrnord/skyrim-community-shaders-VR-DLSS + */ + +#include "Upscaling/UpscaleVS.hlsl" + +#if defined(PSHADER) +#include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" + +typedef VS_OUTPUT PS_INPUT; + +struct PS_OUTPUT +{ + float Depth : SV_Depth; +}; + +Texture2D DepthLowRes : register(t0); + +cbuffer DepthUpscaleCB : register(b0) +{ + float2 SourceDim; // Full texture dimensions (texels) + float2 InvSourceDim; // 1.0 / SourceDim + float2 Scale; // resolutionScale (render/display ratio) + float2 Pad; +}; + +/** + * @brief Main pixel shader entry point + * + * Pure point-sampled depth upscaling. Maps display-res pixel position to + * render-res texel and loads directly — no blending, no mask bleed. + */ +PS_OUTPUT main(PS_INPUT input) +{ + PS_OUTPUT psout; + + // Map full-res UV to render-res UV (same transform as the engine's + // GetDynamicResolutionAdjustedScreenPosition). + float2 uv = Scale * input.TexCoord; + + // Per-eye clamping for SBS stereo: prevent sampling across the center seam. + bool isRight = input.TexCoord.x >= 0.5; + float halfScale = 0.5 * Scale.x; + uv.x = clamp(uv.x, isRight ? halfScale : 0.0, isRight ? Scale.x : halfScale); + uv.y = clamp(uv.y, 0.0, Scale.y); + + // Nearest texel coordinate — pure point sampling, no blending + int2 texel = int2(floor(uv * SourceDim)); + psout.Depth = DepthLowRes.Load(int3(texel, 0)); + + return psout; +} + +#endif diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl new file mode 100644 index 0000000000..4167802e0d --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl @@ -0,0 +1,43 @@ +cbuffer FeatherCB : register(b0) +{ + uint CropX; // paste position X in output space + uint CropY; // paste position Y in output space + uint CropW; // crop width + uint CropH; // crop height + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float3 pad; +}; + +Texture2D CropTexture : register(t0); // DLSS output (crop-sized, at {0,0}) +RWTexture2D OutputTexture : register(u0); // vrFinalOutput (already filled with periphery) + +[numthreads(8, 8, 1)] +void main(uint3 dispatchID : SV_DispatchThreadID) +{ + // dispatchID is in crop-local space (0..CropW-1, 0..CropH-1) + int2 cropLocal = int2(dispatchID.xy); + if (cropLocal.x >= (int)CropW || cropLocal.y >= (int)CropH) + return; + + // Output pixel = crop-local + paste offset + int2 pixel = cropLocal + int2(CropX, CropY); + + // Distance from nearest crop edge (positive = inside) + float distLeft = (float)cropLocal.x; + float distRight = (float)(CropW - 1 - cropLocal.x); + float distTop = (float)cropLocal.y; + float distBottom = (float)(CropH - 1 - cropLocal.y); + float distFromEdge = min(min(distLeft, distRight), min(distTop, distBottom)); + + float4 dlss = CropTexture.Load(int3(cropLocal, 0)); + + if (FeatherWidth <= 0.0 || distFromEdge >= FeatherWidth) { + // Inside crop interior or no feathering: 100% DLSS + OutputTexture[pixel] = dlss; + } else { + // Feather zone: smooth blend from periphery (TAA-stabilized) to DLSS + float blend = smoothstep(0.0, FeatherWidth, distFromEdge); + float4 periphery = OutputTexture[pixel]; + OutputTexture[pixel] = lerp(periphery, dlss, blend); + } +} diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl new file mode 100644 index 0000000000..cffececc09 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl @@ -0,0 +1,56 @@ +// Feathered DLSS crop composite using hardware alpha blending. +// Based on PureDark's approach from Skyrim-Upscaler VR (MIT license). +// +// The render target already contains TAA'd periphery content. +// We output float4(DLSSColor, featherAlpha) and let the output merger's +// SrcAlpha/InvSrcAlpha blend preserve the periphery in the feather zone +// and outside the crop rect entirely. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D CropTexture : register(t0); +SamplerState LinearSampler : register(s0); + +cbuffer FeatheredCompositeCB : register(b0) +{ + float2 CropOrigin; // paste position (x, y) in output-eye pixel coords + float2 CropSize; // crop width, height in pixels + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float _pad0; + float2 SrcUVOrigin; // UV origin in source texture for this crop region + float2 SrcUVScale; // UV scale: maps [0,1] crop-local UV to source texture UV range +}; + +float4 main(VS_OUTPUT input) : SV_Target +{ + float2 pixelPos = input.Position.xy; + + // Distance from each edge of the crop rect (positive = inside) + float distLeft = pixelPos.x - CropOrigin.x; + float distRight = (CropOrigin.x + CropSize.x) - pixelPos.x; + float distTop = pixelPos.y - CropOrigin.y; + float distBottom = (CropOrigin.y + CropSize.y) - pixelPos.y; + + float minDist = min(min(distLeft, distRight), min(distTop, distBottom)); + + // Outside crop rect: fully transparent (hardware blend preserves TAA'd periphery) + if (minDist <= 0.0) + return float4(0, 0, 0, 0); + + // Feather alpha: smoothstep ramp from 0 at edge to 1 at FeatherWidth inside + // (matches the smoothstep from the original CS for visual consistency) + float alpha = (FeatherWidth > 0.0) ? smoothstep(0.0, FeatherWidth, minDist) : 1.0; + + // Map pixel position to crop-local UV [0,1], then remap to source texture UV. + // For per-eye textures: SrcUVOrigin=(0,0), SrcUVScale=(1,1) (identity). + // For SBS textures: SrcUVOrigin/Scale select the correct eye's crop region. + float2 cropUV = (pixelPos - CropOrigin) / CropSize; + float2 srcUV = cropUV * SrcUVScale + SrcUVOrigin; + float3 dlssColor = CropTexture.SampleLevel(LinearSampler, srcUV, 0).rgb; + + return float4(dlssColor, alpha); +} + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl new file mode 100644 index 0000000000..98cf61143e --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl @@ -0,0 +1,13 @@ +// Forces alpha to 1.0 across the entire texture. +// Used after DLSS center paste onto submit texture to ensure Scaleform UI renders. +// DLSS output may have alpha=0 (from R11G11B10→R8G8B8A8 conversion with no alpha source), +// which can prevent UI compositing in the DLSS center area. + +RWTexture2D ColorInOut : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) +{ + float4 c = ColorInOut[dispatchID.xy]; + c.a = 1.0; + ColorInOut[dispatchID.xy] = c; +} diff --git a/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl new file mode 100644 index 0000000000..291280cdc0 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl @@ -0,0 +1,26 @@ +// Bilinear upscale from render-resolution per-eye buffer to display-resolution per-eye buffer. +// Used for VR viewport scaling: fills the full eye output with a cheap upscale so the +// periphery (outside the DLSS-processed center) is not black/empty. + +cbuffer PeripheryFillCB : register(b0) +{ + uint SrcWidth; + uint SrcHeight; + uint DstWidth; + uint DstHeight; +}; + +Texture2D SrcTexture : register(t0); +SamplerState LinearSampler : register(s0); +RWTexture2D DstTexture : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID + : SV_DispatchThreadID) +{ + if (dispatchID.x >= DstWidth || dispatchID.y >= DstHeight) + return; + + // Normalized UV with half-pixel offset for correct bilinear sampling + float2 uv = (float2(dispatchID.xy) + 0.5) / float2(DstWidth, DstHeight); + DstTexture[dispatchID.xy] = SrcTexture.SampleLevel(LinearSampler, uv, 0); +} From d56e72e1634df86d48f00132c9db5b21e04414e4 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:33:51 -0600 Subject: [PATCH 11/16] feat(vr-dlss): viewport scaling, nasal crop offset, and feathered composite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement DLSS viewport scaling with configurable crop region: - Add vrDlssCropOffsetX setting (0.0-0.3) to shift DLSS crop toward the nasal edge where visual acuity is highest - Add vrDlssFeatherWidth setting for smooth blend at crop boundary (currently disabled pending fix, default 0.0) - Implement feathered composite via pixel shader with SrcAlpha blend (preserves periphery TAA quality) with CS fallback path - Create RTV on vrFinalOutput for PS-based compositing - Fix stencil clear value (0xFF → 0x00) for VR depth upscale path - Add eye0Only parameter to FinalizePerEyeOutputs for single-eye mode - Clear stencil marks before periphery TAA to prevent interference - Set TAA high-frequency response (fTAAHighFreq=1.0) during periphery pass for improved sharpness Based in part on techniques from PureDark's Skyrim-Upscaler VR (MIT license: https://github.com/PureDark/Skyrim-Upscaler) Co-Authored-By: Claude Opus 4.6 --- src/Features/Upscaling.cpp | 828 ++++++++++++++++++++++++++++++++++--- src/Features/Upscaling.h | 52 ++- 2 files changed, 817 insertions(+), 63 deletions(-) diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 604f49e986..385a5b2c5c 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -1,7 +1,9 @@ #include "Upscaling.h" #include "Deferred.h" +#include "Features/VRStereoOptimizations.h" #include "Hooks.h" +#include "TAAReorder.h" #include "State.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" @@ -25,7 +27,11 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( sharpnessFSR, sharpnessDLSS, presetDLSS, - useGatherWideKernel); + useGatherWideKernel, + vrDlssViewportScale, + vrPeripheryTAA, + vrDlssCropOffsetX, + vrDlssFeatherWidth); decltype(&D3D11CreateDeviceAndSwapChain) ptrD3D11CreateDeviceAndSwapChainUpscaling; @@ -61,7 +67,9 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( upscaling.CheckBackendFeatures(pAdapter); // Use better swap effect to prevent tearing and improve performance - pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + // But don't change it for VR as it can affect frame pacing with the VR compositor + if (!globals::game::isVR) + pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; bool shouldProxy = !globals::game::isVR; if (shouldProxy) @@ -141,8 +149,14 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( ppImmediateContext); if (upscaling.IsBackendInitialized()) { - upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); - upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + // Skip Streamline interface wrapping for VR — slUpgradeInterface wraps the D3D + // device and swap chain with Streamline proxy objects, which disrupts VR compositor + // frame pacing (causes judder/stuttering). DLSS still functions without wrapped + // interfaces; only frame generation requires them (and that's already VR-gated above). + if (!globals::game::isVR) { + upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); + upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + } upscaling.SetBackendD3DDevice(*ppDevice); upscaling.PostBackendDevice(); } @@ -238,6 +252,41 @@ void Upscaling::DrawSettings() ImGui::Text("Set to 'Default' for automatic selection based on your Upscale Preset and hardware."); ImGui::Text("Changing this setting requires a restart to take effect."); } + + if (globals::game::isVR) { + if (ImGui::TreeNodeEx("VR Viewport Scaling", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::SliderFloat("DLSS Viewport Scale", &settings.vrDlssViewportScale, 0.5f, 1.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Controls the fraction of each eye's view that DLSS processes."); + ImGui::Text("Lower values = better performance, less visible area upscaled by DLSS."); + ImGui::Text("The VR lens hides the periphery, so 0.7-0.85 is recommended."); + ImGui::Text("The periphery outside the DLSS region is filled with a bilinear upscale."); + } + + if (settings.vrDlssViewportScale < 1.0f) { + bool peripheryTAA = settings.vrPeripheryTAA != 0; + if (ImGui::Checkbox("Periphery TAA", &peripheryTAA)) + settings.vrPeripheryTAA = peripheryTAA ? 1 : 0; + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Applies temporal anti-aliasing to the bilinear-upscaled periphery."); + ImGui::Text("Reduces shimmer and improves peripheral quality."); + ImGui::Text("The DLSS center region passes through unchanged."); + } + + ImGui::SliderFloat("Nasal Crop Offset", &settings.vrDlssCropOffsetX, 0.0f, 0.3f, "%.3f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Shifts the DLSS crop region toward the nose."); + ImGui::Text("Higher values = more of the nasal view processed by DLSS."); + ImGui::Text("0.0 = centered, 0.1-0.2 recommended."); + } + + // Feathered edge disabled pending fix — hidden from UI + // ImGui::SliderFloat("Crop Edge Feather", &settings.vrDlssFeatherWidth, 0.0f, 0.1f, "%.3f"); + } + + ImGui::TreePop(); + } + } } if (globals::game::isVR) { @@ -460,6 +509,9 @@ void Upscaling::LoadSettings(json& o_json) logger::warn("[Upscaling] Loaded useGatherWideKernel {} out of range, clamping to 1", settings.useGatherWideKernel); settings.useGatherWideKernel = 1; } + settings.vrDlssViewportScale = std::clamp(settings.vrDlssViewportScale, 0.5f, 1.0f); + settings.vrDlssCropOffsetX = std::clamp(settings.vrDlssCropOffsetX, 0.0f, 0.3f); + settings.vrDlssFeatherWidth = std::clamp(settings.vrDlssFeatherWidth, 0.0f, 0.1f); auto iniSettingCollection = globals::game::iniPrefSettingCollection; if (iniSettingCollection) { auto setting = iniSettingCollection->GetSetting("bUseTAA:Display"); @@ -487,6 +539,10 @@ void Upscaling::DataLoaded() void Upscaling::Load() { *(uintptr_t*)&ptrD3D11CreateDeviceAndSwapChainUpscaling = SKSE::PatchIAT(hk_D3D11CreateDeviceAndSwapChainUpscaling, "d3d11.dll", "D3D11CreateDeviceAndSwapChain"); + + // Install depth/stencil registration hook early (before renderer creates targets) + if (globals::game::isVR) + TAAReorder::InitEarly(); } struct BSImageSpace_Init_FXAA @@ -530,6 +586,10 @@ void Upscaling::PostPostLoad() stl::detour_thunk(REL::RelocationID(98974, 105626)); logger::info("[Upscaling] Installed hooks"); + + // Install TAA reordering hooks for VR periphery TAA + if (globals::game::isVR) + TAAReorder::Init(); } Upscaling::UpscaleMethod Upscaling::GetUpscaleMethod() const @@ -639,6 +699,13 @@ void Upscaling::DestroyUpscalingTextureResources(UpscaleMethod a_upscalemethod) } } + // VR periphery TAA textures - only needed for DLSS with viewport scaling + if (a_upscalemethod != UpscaleMethod::kDLSS) { + vrPreTAACopy = nullptr; + for (int i = 0; i < 2; i++) + vrTAAdPerEye[i].reset(); + } + // Motion vector copy texture is only needed for DLSS - destroy when switching away from DLSS if (a_upscalemethod != UpscaleMethod::kDLSS) { if (motionVectorCopyTexture) { @@ -692,6 +759,8 @@ void Upscaling::CheckResources(UpscaleMethod a_upscalemethod) vrIntermediateMotionVectors[i].reset(); vrIntermediateReactiveMask[i].reset(); vrIntermediateTransparencyMask[i].reset(); + vrFinalOutput[i].reset(); + vrCropColorIn[i].reset(); } } } @@ -744,6 +813,8 @@ ID3D11PixelShader* Upscaling::GetDepthRefractionUpscalePS() if (!depthRefractionUpscalePS) { logger::debug("Compiling DepthRefractionUpscalePS.hlsl"); std::vector> defines = { { "PSHADER", "" } }; + if (globals::game::isVR) + defines.push_back({ "VR", "" }); depthRefractionUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DepthRefractionUpscalePS.hlsl", defines, "ps_5_0")); } @@ -771,6 +842,37 @@ ID3D11VertexShader* Upscaling::GetUpscaleVS() return upscaleVS.get(); } +ID3D11PixelShader* Upscaling::GetDlssCompositePS() +{ + if (!vrDlssCompositePS) { + logger::debug("Compiling DLSSCompositePS.hlsl"); + vrDlssCompositePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0")); + } + + return vrDlssCompositePS.get(); +} + +ID3D11PixelShader* Upscaling::GetDlssUpscalePS() +{ + if (!vrDlssUpscalePS) { + logger::debug("Compiling DLSSCompositePS.hlsl (BILINEAR_UPSCALE)"); + vrDlssUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", + { { "PSHADER", "" }, { "BILINEAR_UPSCALE", "" } }, "ps_5_0")); + } + + if (!vrDlssUpscaleCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = sizeof(DlssCompositeCB); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrDlssUpscaleCB.put())); + } + + return vrDlssUpscalePS.get(); +} + eastl::unique_ptr Upscaling::CreateTextureFromSource(ID3D11Resource* src, uint32_t width, uint32_t height, bool copyBindFlags, bool createSRV, bool createUAV, const char* name) { @@ -820,7 +922,7 @@ void Upscaling::CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight std::string suffix = (i == 0) ? "Left" : "Right"; vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, inWidth, inHeight, false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); - vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, false, ("Upscale_ColorOut_" + suffix).c_str()); + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); // Depth: R32_TYPELESS base (matches kMAIN), with R32_FLOAT SRV for ClearHMDMaskCS. // CopySubresourceRegion requires matching typeless formats; SRV reinterprets as R32_FLOAT. @@ -873,48 +975,228 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; - bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; - if (!needsRecreate) { - needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); - } - if (needsRecreate) { - logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", - eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); - CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, - colorSrc, mvecSrc, reactiveSrc, transparencySrc); - } + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + if (viewportScaling) { + // Viewport scaling: physically crop all DLSS inputs to eliminate non-zero subrect offsets. + // vrIntermediateColorIn stays at FULL render-res (for ClearHMDMask + FillPeriphery). + // All other DLSS inputs (depth, mvec, masks) are CROP-sized. + // vrCropColorIn is CROP-sized (DLSS color input, extracted from masked full color). + // This ensures DLSS sees all inputs at {0,0} with no subrect base offsets, + // which is critical for correct temporal reprojection during camera motion. + uint32_t cropWidthIn = (uint32_t)(eyeWidthIn * vpScale); + uint32_t cropHeightIn = (uint32_t)(eyeHeightIn * vpScale); + uint32_t cropWidthOut = (uint32_t)(eyeWidthOut * vpScale); + uint32_t cropHeightOut = (uint32_t)(eyeHeightOut * vpScale); + + bool needsRecreate = !vrIntermediateColorIn[0] || !vrCropColorIn[0] || !vrIntermediateDepth[0] || + !vrIntermediateColorOut[0] || !vrFinalOutput[0]; + if (!needsRecreate) { + // Check format too — periphery TAA feeds R8G8B8A8 post-PP intermediate, + // while normal DLSS feeds R11G11B10 kMAIN. Must recreate on format change. + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrCropColorIn[0]->desc.Width != cropWidthIn || + vrCropColorIn[0]->desc.Height != cropHeightIn || + vrIntermediateDepth[0]->desc.Width != cropWidthIn || + vrIntermediateDepth[0]->desc.Height != cropHeightIn || + vrIntermediateColorOut[0]->desc.Width != cropWidthOut || + vrIntermediateColorOut[0]->desc.Height != cropHeightOut || + vrFinalOutput[0]->desc.Width != eyeWidthOut || + vrFinalOutput[0]->desc.Height != eyeHeightOut); + } - // Extract both eyes' inputs from combined stereo buffers - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR viewport-scaled intermediates: full {}x{}, crop in {}x{}, crop out {}x{}", + eyeWidthIn, eyeHeightIn, cropWidthIn, cropHeightIn, cropWidthOut, cropHeightOut); + + for (int i = 0; i < 2; i++) { + std::string suffix = (i == 0) ? "Left" : "Right"; + + // Full-size color for ClearHMDMask + FillPeriphery + vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, eyeWidthIn, eyeHeightIn, + false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); + + // Crop-sized DLSS color input (needs UAV for ClearHMDMask) + vrCropColorIn[i] = CreateTextureFromSource(colorSrc, cropWidthIn, cropHeightIn, + false, true, true, ("Upscale_CropColorIn_" + suffix).c_str()); + + // Crop-sized DLSS output + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, cropWidthOut, cropHeightOut, + false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); + + // Crop-sized depth (R32_TYPELESS with R32_FLOAT SRV) + { + D3D11_TEXTURE2D_DESC depthDesc = {}; + depthDesc.Width = cropWidthIn; + depthDesc.Height = cropHeightIn; + depthDesc.MipLevels = 1; + depthDesc.ArraySize = 1; + depthDesc.Format = DXGI_FORMAT_R32_TYPELESS; + depthDesc.SampleDesc.Count = 1; + depthDesc.Usage = D3D11_USAGE_DEFAULT; + depthDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + vrIntermediateDepth[i] = eastl::make_unique(depthDesc); + Util::SetResourceName(vrIntermediateDepth[i]->resource.get(), ("Upscale_Depth_" + suffix).c_str()); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + vrIntermediateDepth[i]->CreateSRV(srvDesc); + } - context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); - } + // Crop-sized motion vectors, reactive mask, transparency mask + vrIntermediateMotionVectors[i] = CreateTextureFromSource(mvecSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_MVec_" + suffix).c_str()); + vrIntermediateReactiveMask[i] = CreateTextureFromSource(reactiveSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Reactive_" + suffix).c_str()); + vrIntermediateTransparencyMask[i] = CreateTextureFromSource(transparencySrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Transparency_" + suffix).c_str()); + + // Full display-res composition target (needs RTV for PS feathered composite) + vrFinalOutput[i] = CreateTextureFromSource(colorSrc, eyeWidthOut, eyeHeightOut, + false, true, true, ("Upscale_FinalOutput_" + suffix).c_str()); + // Add render target bind flag and create RTV for pixel shader composite + { + D3D11_TEXTURE2D_DESC finalDesc; + vrFinalOutput[i]->resource->GetDesc(&finalDesc); + if (!(finalDesc.BindFlags & D3D11_BIND_RENDER_TARGET)) { + // Recreate with render target support + finalDesc.BindFlags |= D3D11_BIND_RENDER_TARGET; + vrFinalOutput[i] = eastl::make_unique(finalDesc); + Util::SetResourceName(vrFinalOutput[i]->resource.get(), ("Upscale_FinalOutput_" + suffix).c_str()); + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = finalDesc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = 1; + vrFinalOutput[i]->CreateSRV(srvDesc); + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = finalDesc.Format; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateUAV(uavDesc); + } + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = finalDesc.Format; + rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateRTV(rtvDesc); + } + } + } - // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer. - // Depth is read from the combined stereo SRV at the per-eye offset; color is written - // to the isolated per-eye UAV (ColorOffsetX = 0). - auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + // Copy full eye to full-size vrIntermediateColorIn (raw render-res, no HMD mask yet) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + } + + // Nasal offset: shift crop toward nose + // Eye 0 (left): nose is right -> shift right (+) + // Eye 1 (right): nose is left -> shift left (-) + float nasalShiftFrac = settings.vrDlssCropOffsetX; + uint32_t baseCropOffsetX = (eyeWidthIn - cropWidthIn) / 2; + uint32_t cropOffsetY = (eyeHeightIn - cropHeightIn) / 2; + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + + // Extract DLSS crop from raw buffer (before TAA or HMD mask), then mask the crop directly. + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + + // Per-eye nasal offset in render resolution space + int32_t nasalShift = (int32_t)(nasalShiftFrac * eyeWidthIn); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t cropOffsetX = (uint32_t)std::clamp((int32_t)baseCropOffsetX + eyeNasalShift, 0, (int32_t)(eyeWidthIn - cropWidthIn)); + + // Crop color from raw (unmasked, non-TAA'd) full-size buffer + D3D11_BOX cropBox = { cropOffsetX, cropOffsetY, 0, cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrCropColorIn[i]->resource.get(), 0, 0, 0, 0, + vrIntermediateColorIn[i]->resource.get(), 0, &cropBox); + + // ClearHMDMask directly on the crop (depth offset accounts for eye + crop position in stereo buffer) + ClearHMDMask(vrCropColorIn[i]->uav.get(), depthTexture.depthSRV, + cropWidthIn, cropHeightIn, offsetXIn + cropOffsetX, 0, cropOffsetY); + + // Crop depth/mvec/reactive/transparency directly from stereo buffers + D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, + offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, + depthSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, + mvecSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, + reactiveSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, + transparencySrc, 0, &stereoCropBox); + } - for (uint32_t i = 0; i < 2; ++i) { - uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; - ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, - eyeWidthIn, eyeHeightIn, depthOffset, 0); + // ClearHMDMask on full-size buffer (for FillPeriphery) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } + } else { + // Non-viewport-scaling path: all textures at full per-eye dimensions + bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; + if (!needsRecreate) { + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + } + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", + eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, + colorSrc, mvecSrc, reactiveSrc, transparencySrc); + } + + // Release viewport-scaling-specific textures + for (int i = 0; i < 2; i++) { + vrCropColorIn[i].reset(); + vrFinalOutput[i].reset(); + vrTAAdPerEye[i].reset(); + } + vrPreTAACopy = nullptr; + + // Copy full eye to per-eye intermediates + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); + } + + // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } } if (state->frameAnnotations) state->EndPerfEvent(); } -void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) +void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only) { if (!globals::game::isVR) return; @@ -929,11 +1211,258 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); uint32_t eyeHeightOut = (uint32_t)screenSize.y; - // Write upscaled outputs back - for (uint32_t i = 0; i < 2; ++i) { + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { uint32_t offsetXOut = (i == 1) ? eyeWidthOut : 0; - D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; - context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + + if (viewportScaling && vrFinalOutput[i]) { + // Paste crop-sized DLSS output into center of full-size composition target + uint32_t dlssWidthOut = vrIntermediateColorOut[i]->desc.Width; + uint32_t dlssHeightOut = vrIntermediateColorOut[i]->desc.Height; + + // Calculate paste position with nasal offset (in display resolution space) + int32_t nasalShift = (int32_t)(settings.vrDlssCropOffsetX * eyeWidthOut); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t pasteX = (uint32_t)std::clamp((int32_t)((eyeWidthOut - dlssWidthOut) / 2) + eyeNasalShift, 0, (int32_t)(eyeWidthOut - dlssWidthOut)); + uint32_t pasteY = (eyeHeightOut - dlssHeightOut) / 2; + + float featherPixels = settings.vrDlssFeatherWidth * eyeWidthOut; + + static uint32_t featherLogCount = 0; + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: featherPixels={:.1f} (width={:.4f} * eyeW={}), dlss={}x{}, paste=({},{})", + i, featherPixels, settings.vrDlssFeatherWidth, eyeWidthOut, dlssWidthOut, dlssHeightOut, pasteX, pasteY); + } + + if (featherPixels > 0.0f && vrFinalOutput[i]->rtv) { + // Pixel shader feathered composite (preserves periphery TAA via hardware alpha blend) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + + // Lazy-compile PS and create blend state + if (!vrFeatheredCompositePS) { + vrFeatheredCompositePS.attach(reinterpret_cast(Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0"))); + if (vrFeatheredCompositePS) + logger::info("[Upscaling] FeatheredCompositePS compiled successfully"); + else + logger::error("[Upscaling] FeatheredCompositePS FAILED to compile!"); + + // Create SrcAlpha/InvSrcAlpha blend state + D3D11_BLEND_DESC blendDesc = {}; + blendDesc.RenderTarget[0].BlendEnable = TRUE; + blendDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; + blendDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; + blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(globals::d3d::device->CreateBlendState(&blendDesc, vrFeatheredCompositeBlendState.put())); + + // Create constant buffer (48 bytes: float2 origin, float2 size, float feather, pad, float2 srcUVOrigin, float2 srcUVScale) + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + + if (vrFeatheredCompositePS) { + // Save current OM state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11RenderTargetView* oldRTV = nullptr; + ID3D11DepthStencilView* oldDSV = nullptr; + context->OMGetRenderTargets(1, &oldRTV, &oldDSV); + + // Update constant buffer + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)pasteX, (float)pasteY, + (float)dlssWidthOut, (float)dlssHeightOut, + featherPixels, 0.0f, + 0.0f, 0.0f, // SrcUVOrigin: (0,0) for per-eye textures (identity) + 1.0f, 1.0f, // SrcUVScale: (1,1) for per-eye textures (identity) + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + // Set viewport to match the per-eye final output + D3D11_VIEWPORT vp = {}; + vp.Width = (float)eyeWidthOut; + vp.Height = (float)eyeHeightOut; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + // Set blend state for feathered alpha compositing + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + // Set render target to vrFinalOutput (contains TAA'd periphery) + ID3D11RenderTargetView* rtvs[1] = { vrFinalOutput[i]->rtv.get() }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + // Set shaders + context->VSSetShader(GetUpscaleVS(), nullptr, 0); + context->PSSetShader(vrFeatheredCompositePS.get(), nullptr, 0); + + // Set input assembler for bufferless fullscreen triangle + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + // Bind rasterizer state (no culling) + context->RSSetState(upscaleRasterizerState.get()); + + // Bind crop texture SRV and sampler + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->PSSetShaderResources(0, 1, srvs); + + if (!vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&sampDesc, vrLinearSampler.put())); + } + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + // Bind constant buffer + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + // Draw fullscreen triangle + context->Draw(3, 0); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositePS drawn: eye={}, crop=({},{}) {}x{}, feather={:.1f}", + i, pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels); + featherLogCount++; + } + + // Cleanup PS state + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->PSSetShaderResources(0, 1, nullSRV); + context->PSSetShader(nullptr, nullptr, 0); + context->VSSetShader(nullptr, nullptr, 0); + + // Restore OM state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + if (oldBlendState) + oldBlendState->Release(); + context->OMSetRenderTargets(1, &oldRTV, oldDSV); + if (oldRTV) + oldRTV->Release(); + if (oldDSV) + oldDSV->Release(); + } else { + // PS compilation failed; fall back to hard paste + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else if (featherPixels > 0.0f) { + // Fallback: RTV not available, use legacy CS path + if (!vrFeatheredCompositeCS) { + vrFeatheredCompositeCS.attach((ID3D11ComputeShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositeCS.hlsl", {}, "cs_5_0")); + if (vrFeatheredCompositeCS) + logger::info("[Upscaling] FeatheredCompositeCS compiled (fallback)"); + else + logger::error("[Upscaling] FeatheredCompositeCS FAILED to compile!"); + if (!vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + } + + if (vrFeatheredCompositeCS) { + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + uint32_t cx, cy, cw, ch; + float feather; + float pad[3]; + } cbData = { + pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels, {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[i]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(vrFeatheredCompositeCS.get(), nullptr, 0); + context->Dispatch((dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8, 1); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositeCS dispatched (fallback): groups=({},{})", + (dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8); + featherLogCount++; + } + + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->CSSetShaderResources(0, 1, nullSRV); + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + } else { + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else { + // Hard paste (no feathering) + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: using HARD PASTE (no feathering)", i); + featherLogCount++; + } + } + + // Copy composition target to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrFinalOutput[i]->resource.get(), 0, &outBox); + } else { + // Direct copy DLSS output to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + } } if (state->frameAnnotations) @@ -941,7 +1470,11 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) } void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX) + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY, + uint32_t depthWidth, uint32_t depthHeight, + uint32_t colorWidth, uint32_t colorHeight, + ID3D11ShaderResourceView* fallbackSRV, uint32_t fallbackOffsetX) { if (!globals::game::isVR) return; @@ -952,7 +1485,7 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe vrClearHMDMaskCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/ClearHMDMaskCS.hlsl", {}, "cs_5_0")); D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = 16; // 4 uints + cbDesc.ByteWidth = 32; // 8 uints (offsets + optional scaling dimensions) cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -965,8 +1498,9 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->CSSetShader(vrClearHMDMaskCS.get(), nullptr, 0); - ID3D11ShaderResourceView* srvs[1] = { depthSRV }; - context->CSSetShaderResources(0, 1, srvs); + // t0 = depth, t1 = fallback (nullptr → unbound → reads return (0,0,0,0) → black) + ID3D11ShaderResourceView* srvs[2] = { depthSRV, fallbackSRV }; + context->CSSetShaderResources(0, 2, srvs); ID3D11UnorderedAccessView* uavs[1] = { colorUAV }; context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); @@ -974,9 +1508,10 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe D3D11_MAPPED_SUBRESOURCE mapped{}; context->Map(vrClearHMDMaskCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - uint32_t offsets[4] = { depthOffsetX, colorOffsetX, 0, 0 }; + uint32_t cbData[8] = { depthOffsetX, colorOffsetX, depthOffsetY, fallbackOffsetX, + depthWidth, depthHeight, colorWidth, colorHeight }; - memcpy(mapped.pData, offsets, sizeof(offsets)); + memcpy(mapped.pData, cbData, sizeof(cbData)); context->Unmap(vrClearHMDMaskCB.get(), 0); ID3D11Buffer* cbs[1] = { vrClearHMDMaskCB.get() }; @@ -984,13 +1519,81 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->Dispatch(dispatchX, dispatchY, 1); + // Unbind + ID3D11ShaderResourceView* nullSRVs[2] = { nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + ID3D11Buffer* nullCB[1] = { nullptr }; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } +} + +void Upscaling::FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV) +{ + if (!globals::game::isVR || !vrFinalOutput[eyeIndex]) + return; + if (!overrideSRV && !vrIntermediateColorIn[eyeIndex]) + return; + + auto context = globals::d3d::context; + + if (!vrPeripheryFillCS) { + vrPeripheryFillCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/VRPeripheryFillCS.hlsl", {}, "cs_5_0")); + + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 16; // 4 uints + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrPeripheryFillCB.put())); + + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&samplerDesc, vrLinearSampler.put())); + } + + if (vrPeripheryFillCS) { + auto dispatchX = (dstWidth + 7) / 8; + auto dispatchY = (dstHeight + 7) / 8; + + context->CSSetShader(vrPeripheryFillCS.get(), nullptr, 0); + + // Read from overrideSRV (e.g. TAA texture) or default render-res per-eye input. + ID3D11ShaderResourceView* srvs[1] = { overrideSRV ? overrideSRV : vrIntermediateColorIn[eyeIndex]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[eyeIndex]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrPeripheryFillCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + uint32_t cbData[4] = { srcWidth, srcHeight, dstWidth, dstHeight }; + memcpy(mapped.pData, cbData, sizeof(cbData)); + context->Unmap(vrPeripheryFillCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrPeripheryFillCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + + context->Dispatch(dispatchX, dispatchY, 1); + // Unbind ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; ID3D11Buffer* nullCB[1] = { nullptr }; + ID3D11SamplerState* nullSampler[1] = { nullptr }; context->CSSetShaderResources(0, 1, nullSRV); context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetSamplers(0, 1, nullSampler); context->CSSetShader(nullptr, nullptr, 0); } } @@ -1101,6 +1704,10 @@ void Upscaling::ConfigureUpscaling(RE::BSGraphics::State* a_viewport) // Disable dynamic resolution unless the game explicitly enables it if (!globals::game::isVR) runtimeData.dynamicResolutionLock = 1; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // No longer need to force-disable culling when upscaling is active. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs. } void Upscaling::SetupResources() @@ -1145,6 +1752,7 @@ void Upscaling::SetupResources() depthStencilDesc.BackFace.StencilDepthFailOp = depthStencilDesc.FrontFace.StencilDepthFailOp; depthStencilDesc.BackFace.StencilPassOp = depthStencilDesc.FrontFace.StencilPassOp; depthStencilDesc.BackFace.StencilFunc = depthStencilDesc.FrontFace.StencilFunc; + } else { depthStencilDesc.StencilEnable = false; // Disable stencil testing } @@ -1198,6 +1806,16 @@ void Upscaling::ClearShaderCache() depthRefractionUpscalePS = nullptr; // com_ptr automatically releases underwaterMaskUpscalePS = nullptr; // com_ptr automatically releases upscaleVS = nullptr; // com_ptr automatically releases + vrClearHMDMaskCS = nullptr; + vrPeripheryFillCS = nullptr; + vrPeripheryFillCB = nullptr; + vrFeatheredCompositeCS = nullptr; + vrFeatheredCompositeCB = nullptr; + vrFeatheredCompositePS = nullptr; + vrFeatheredCompositeBlendState = nullptr; + vrDlssCompositePS = nullptr; + vrDlssUpscalePS = nullptr; + vrDlssUpscaleCB = nullptr; } void Upscaling::CopySharedD3D12Resources() @@ -1408,6 +2026,18 @@ bool Upscaling::IsUpscalingActive() const return resolutionScale.x < .99f; } +std::vector Upscaling::GetActiveConstraints() const +{ + std::vector constraints; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs, + // so we no longer need to constrain depth buffer culling when upscaling is active. + + return constraints; +} + + /** * @brief Retrieves the current frame time for frame generation. * @@ -1519,7 +2149,7 @@ Upscaling::BlurResources Upscaling::GetBlurResources() const return {}; } -void Upscaling::Upscale() +void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) { auto upscaleMethod = GetUpscaleMethod(); @@ -1580,8 +2210,13 @@ void Upscaling::Upscale() { state->BeginPerfEvent("Upscaling"); + // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) + ID3D11Resource* colorSrc = colorSourceOverride + ? static_cast(colorSourceOverride) + : static_cast(main.texture); + if (upscaleMethod == UpscaleMethod::kDLSS) { - streamline.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); + streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); } else if (upscaleMethod == UpscaleMethod::kFSR) { fidelityFX.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVector.texture, settings.sharpnessFSR); } @@ -1719,9 +2354,10 @@ void Upscaling::UpscaleDepth() // Skip alias copies to reduce unnecessary copy churn. copyIfNonAliased(depthCopy.texture, depth.texture); - // Clear stencil to be 0xFF + // Clear stencil to 0x00 for VR — the VR shader path discards pixels with + // stencil > 0x00, so 0x00 means "all pixels valid" (process entire display-res). if (globals::game::isVR) { - context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0xFF); + context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0x00); } // Set depth stencil state to write 0x00 @@ -1821,20 +2457,91 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 if (upscaling.d3d12SwapChainActive && upscaling.settings.frameGenerationMode) upscaling.CopySharedD3D12Resources(); - if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) - upscaling.PerformUpscaling(); + // Increment diagnostic counter (rate-limits TAAReorder logging) + if (TAAReorder::g_initialized) { + TAAReorder::g_diagCounter = (TAAReorder::g_diagCounter + 1) % TAAReorder::DIAG_INTERVAL; + if (TAAReorder::g_diagCounter == 0) { + TAAReorder::g_frameSeqCounter = 0; + logger::info("[SEQ] Main_PostProcessing START seq={}", TAAReorder::g_frameSeqCounter++); + } + } - if (upscaleMethod == UpscaleMethod::kDLSS) - upscaling.ApplySharpening(); + bool peripheryTAA = TAAReorder::ShouldReorderTAA(); + + if (peripheryTAA) { + // ─── Periphery TAA with post-conductor DLSS (PureDark's approach) ─── + // func() with TAA enabled → conductor runs all passes unimpeded: + // Phase 2A: ExecutePassHook captures post-PP intermediate to g_postPPCopy + // Phase 5: TAA + DRS → submit texture + // After conductor: ConductorCallHook evaluates DLSS on g_postPPCopy, + // then pastes DLSS center onto submit texture + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + + // Reset per-frame flags + TAAReorder::g_postPPReady = false; + TAAReorder::g_dlssReady = false; + TAAReorder::g_dlssPasteComplete = false; + TAAReorder::g_phase5Complete = false; + TAAReorder::g_bsHookCallCount = 0; + + if (TAAReorder::g_diagCounter == 0) + logger::info("[TAAReorder] peripheryTAA: running func() with TAA enabled..."); + + // Clear stencil marks left by VRStereoOptimizations to prevent TAA interference + if (globals::features::vrStereoOptimizations.loaded) { + auto renderer = globals::game::renderer; + auto& depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depth.views[0]) + globals::d3d::context->ClearDepthStencilView(depth.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0); + } - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + // Set TAA high-frequency response for periphery quality + auto fTAAHighFreq = RE::GetINISetting("fTAAHighFreq:Display"); + float savedHF = fTAAHighFreq ? fTAAHighFreq->data.f : 0.0f; + if (fTAAHighFreq) + fTAAHighFreq->data.f = 1.0f; + + // func() with TAA ENABLED — DLSS eval + paste in ConductorCallHook (post-conductor) + BSImagespaceShaderISTemporalAA->taaEnabled = true; + func(a_this, a3, a_target, a_4, a_5); + + // Restore original TAA HF value + if (fTAAHighFreq) + fTAAHighFreq->data.f = savedHF; + + // Lock DRS + update camera (after conductor completes) + auto& runtimeData = globals::game::graphicsState->GetRuntimeData(); + runtimeData.dynamicResolutionLock = 1; + UpdateCameraData(); + + // Disable TAA for remainder of frame + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } else { + // ─── Normal flow (no periphery TAA) ─── + if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) + upscaling.PerformUpscaling(); + + if (upscaleMethod == UpscaleMethod::kDLSS) + upscaling.ApplySharpening(); + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); - BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod == UpscaleMethod::kTAA; + BSImagespaceShaderISTemporalAA->taaEnabled = (upscaleMethod == UpscaleMethod::kTAA); - func(a_this, a3, a_target, a_4, a_5); + if (TAAReorder::g_diagCounter == 0 && TAAReorder::g_initialized) + logger::info("[DIAG] Normal DLSS flow: taaEnabled={}, running func()...", BSImagespaceShaderISTemporalAA->taaEnabled); - BSImagespaceShaderISTemporalAA->taaEnabled = false; + func(a_this, a3, a_target, a_4, a_5); + + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } + + // VR CAS sharpening (after TAA) + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + globals::features::vrStereoOptimizations.ApplyCAS(a_target); } void Upscaling::SetScissorRect::thunk(RE::BSGraphics::Renderer* This, int a_left, int a_top, int a_right, int a_bottom) @@ -1868,3 +2575,4 @@ void Upscaling::BSFaceGenManager_UpdatePendingCustomizationTextures::thunk() func(); runtimeData.dynamicResolutionLock = 0; } + diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index 6cecf6cbaf..0cf39ee719 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -59,6 +59,10 @@ struct Upscaling : Feature float sharpnessDLSS = 0.0f; uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) + uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) + float vrDlssCropOffsetX = 0.0f; // 0.0-0.3, nasal offset fraction for DLSS crop position + float vrDlssFeatherWidth = 0.0f; // 0.0-0.1, feather width fraction at DLSS crop boundary (disabled pending fix) }; Settings settings; @@ -110,6 +114,7 @@ struct Upscaling : Feature virtual void Load() override; virtual void PostPostLoad() override; virtual void SetupResources() override; + virtual std::vector GetActiveConstraints() const override; UpscaleMethod GetUpscaleMethod() const; @@ -138,7 +143,11 @@ struct Upscaling : Feature winrt::com_ptr vrClearHMDMaskCB; // Helper to dispatch mask clearing for a single eye region void ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX); + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY = 0, + uint32_t depthWidth = 0, uint32_t depthHeight = 0, + uint32_t colorWidth = 0, uint32_t colorHeight = 0, + ID3D11ShaderResourceView* fallbackSRV = nullptr, uint32_t fallbackOffsetX = 0); // Shared VR Per-Eye Intermediate Buffers // Owned here so both Streamline (DLSS) and FidelityFX (FSR) can use them. @@ -148,6 +157,43 @@ struct Upscaling : Feature eastl::unique_ptr vrIntermediateMotionVectors[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateReactiveMask[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateTransparencyMask[2]; // per-eye render resolution + eastl::unique_ptr vrFinalOutput[2]; // per-eye display-res composition target (VR viewport scaling) + eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) + + // Periphery TAA (conductor approach) — used by two-call func() flow + winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) + eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) + + // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) + winrt::com_ptr vrPeripheryFillCS; + winrt::com_ptr vrPeripheryFillCB; + winrt::com_ptr vrLinearSampler; + + // Feathered composite compute shader (legacy, kept as fallback) + winrt::com_ptr vrFeatheredCompositeCS; + winrt::com_ptr vrFeatheredCompositeCB; + + // Feathered composite pixel shader approach (replaces CS to preserve periphery TAA) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + winrt::com_ptr vrFeatheredCompositePS; + winrt::com_ptr vrFeatheredCompositeBlendState; + + // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) + winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) + winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) + winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params + ID3D11PixelShader* GetDlssCompositePS(); + ID3D11PixelShader* GetDlssUpscalePS(); + + struct DlssCompositeCB + { + float2 DynResScale; // renderRes / displayRes per-eye + float2 EyeOffset; // (i * eyeWidth, 0) + float2 SrcTexSize; // full texture dimensions + float2 pad; + }; + void FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV = nullptr); // Helper to create/resize per-eye buffers matching source formats void CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight, uint32_t outWidth, uint32_t outHeight, @@ -160,11 +206,11 @@ struct Upscaling : Feature // Shared Pipeline Steps void PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* depthSrc, ID3D11Resource* mvecSrc, ID3D11Resource* reactiveSrc, ID3D11Resource* transparencySrc); - void FinalizePerEyeOutputs(ID3D11Resource* colorDst); + void FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only = false); void ConfigureTAA(); void ConfigureUpscaling(RE::BSGraphics::State* a_state); - void Upscale(); + void Upscale(ID3D11Texture2D* colorSourceOverride = nullptr); // D3D11 textures Texture2D* reactiveMaskTexture = nullptr; From 5d46b9840156ae277117ac184847edb0a2bc1188 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:34:04 -0600 Subject: [PATCH 12/16] feat(vr-dlss): Streamline per-eye DLSS evaluation for VR Refactor Streamline DLSS to support per-eye evaluation in VR: - Split CheckFrameConstants into per-eye loop with viewport[i] offsets - Add crop-aware subrect calculation when viewport scaling is active - Compute cameraPinholeOffset for asymmetric crop (nasal offset) - Support per-eye intermediate textures for color, depth, motion vectors, reactive mask, and transparency mask inputs - Add colorSourceOverride parameter for TAAReorder post-conductor flow - Fix jitter scaling to use crop dimensions when viewport scaling Co-Authored-By: Claude Opus 4.6 --- src/Features/Upscaling/Streamline.cpp | 204 +++++++++++++++++++++++--- src/Features/Upscaling/Streamline.h | 6 +- 2 files changed, 184 insertions(+), 26 deletions(-) diff --git a/src/Features/Upscaling/Streamline.cpp b/src/Features/Upscaling/Streamline.cpp index 71eb3a3542..304883e588 100644 --- a/src/Features/Upscaling/Streamline.cpp +++ b/src/Features/Upscaling/Streamline.cpp @@ -7,6 +7,7 @@ #include "../../Hooks.h" #include "../../State.h" #include "../../Util.h" +#include "../TAAReorder.h" #include "../Upscaling.h" #include "DX12SwapChain.h" @@ -237,6 +238,20 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.cameraMotionIncluded = sl::Boolean::eTrue; slConstants.cameraPinholeOffset = { 0.f, 0.f }; + + // VR nasal offset: when the crop is shifted, tell DLSS the optical center is offset + if (globals::game::isVR) { + float nasalFrac = globals::features::upscaling.settings.vrDlssCropOffsetX; + float vpScale = globals::features::upscaling.settings.vrDlssViewportScale; + if (nasalFrac > 0.0f && vpScale < 1.0f) { + // Pinhole offset in NDC: how far the crop center is from the eye's optical axis + // Eye 0: shifted right (+X), Eye 1: shifted left (-X) + float shiftNDC = nasalFrac / vpScale; // normalized to crop width + float sign = (eyeIndex == 0) ? 1.0f : -1.0f; + slConstants.cameraPinholeOffset = { sign * shiftNDC, 0.f }; + } + } + slConstants.cameraRight = { viewMatrix._11, viewMatrix._12, viewMatrix._13 }; slConstants.cameraUp = { viewMatrix._21, viewMatrix._22, viewMatrix._23 }; slConstants.cameraFwd = { viewMatrix._31, viewMatrix._32, viewMatrix._33 }; @@ -245,6 +260,28 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.depthInverted = sl::Boolean::eFalse; if (globals::game::isVR) { + // When VR viewport scaling is active, DLSS processes a centered sub-region of each eye. + // The projection matrix must be adjusted to describe only the crop's FOV, not the full eye. + // Without this, DLSS's temporal reprojection maps pixels to wrong world positions, + // destroying temporal accumulation (causing aliasing and instability). + // Scaling rows 0 and 1 of the projection by 1/vpScale narrows the clip-space X/Y + // to match the crop region. clipToPrevClip must also be conjugated (see below). + float vpScale = globals::features::upscaling.settings.vrDlssViewportScale; + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Row 0 → clip.x, Row 1 → clip.y (Streamline row-major, P * pos convention) + slConstants.cameraViewToClip[0].x *= invScale; + slConstants.cameraViewToClip[0].y *= invScale; + slConstants.cameraViewToClip[0].z *= invScale; + slConstants.cameraViewToClip[0].w *= invScale; + slConstants.cameraViewToClip[1].x *= invScale; + slConstants.cameraViewToClip[1].y *= invScale; + slConstants.cameraViewToClip[1].z *= invScale; + slConstants.cameraViewToClip[1].w *= invScale; + // Narrow the reported FOV to match the crop + slConstants.cameraFOV = 2.0f * atanf(vpScale * tanf(slConstants.cameraFOV * 0.5f)); + } + // VR: compute clipToCameraView / clipToPrevClip / prevClipToClip from Skyrim's per-eye matrices. // recalculateCameraMatrices() uses a single static prev-frame slot -- unusable for two viewports. sl::matrixFullInvert(slConstants.clipToCameraView, slConstants.cameraViewToClip); @@ -258,7 +295,62 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye sl::float4x4 invCurrViewProj; sl::matrixFullInvert(invCurrViewProj, currViewProjSL); sl::matrixMul(slConstants.clipToPrevClip, invCurrViewProj, prevViewProjSL); + + // When viewport scaling is active, cameraViewToClip is adjusted (narrower FOV), + // changing the clip space. clipToPrevClip (computed from unadjusted VP) maps between + // unadjusted clip spaces. We must conjugate it to map between adjusted clip spaces: + // CTP_adj = inv(S) * CTP * S + // where S = diag(invScale, invScale, 1, 1), inv(S) = diag(vpScale, vpScale, 1, 1). + // + // Derivation (row-vector convention: clip = view * P): + // clip_adj = clip_unadj * S (scaling rows 0,1 of P scales clip x,y by invScale) + // clip_prev_adj = clip_prev_unadj * S + // clip_prev_unadj = clip_curr_unadj * CTP + // clip_prev_adj = (clip_curr_adj * inv(S)) * CTP * S = clip_curr_adj * (inv(S) * CTP * S) + // + // Element-wise: CTP_adj[i][j] = inv(S)[i] * CTP[i][j] * S[j] + // Rows 0,1, cols 0,1: vpScale * invScale = 1 (unchanged) + // Rows 0,1, cols 2,3: vpScale * 1 = vpScale + // Rows 2,3, cols 0,1: 1 * invScale = invScale + // Rows 2,3, cols 2,3: unchanged + // + // This ensures clipToPrevClip agrees with per-pixel motion vectors. + // Without correct conjugation, DLSS sees disagreement between the camera-predicted + // motion and per-pixel motion vectors, causing it to reject temporal accumulation + // during camera motion. (When still, CTP ≈ I, and inv(S)*I*S = I → no mismatch.) + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Rows 0,1 cols 2,3: multiply by vpScale (from left-multiply by inv(S)) + slConstants.clipToPrevClip[0].z *= vpScale; + slConstants.clipToPrevClip[0].w *= vpScale; + slConstants.clipToPrevClip[1].z *= vpScale; + slConstants.clipToPrevClip[1].w *= vpScale; + // Rows 2,3 cols 0,1: multiply by invScale (from right-multiply by S) + slConstants.clipToPrevClip[2].x *= invScale; + slConstants.clipToPrevClip[2].y *= invScale; + slConstants.clipToPrevClip[3].x *= invScale; + slConstants.clipToPrevClip[3].y *= invScale; + } + sl::matrixFullInvert(slConstants.prevClipToClip, slConstants.clipToPrevClip); + + // Per-eye diagnostic logging for temporal quality investigation + { + static uint32_t ctpDiagCounter = 0; + bool ctpDiag = (ctpDiagCounter++ % 300 == 0) || (TAAReorder::g_diagCounter == 0 && vpScale < 1.0f); + if (ctpDiag) { + auto& ctp = slConstants.clipToPrevClip; + logger::info("[DLSS-CTP] Eye {} clipToPrevClip diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, ctp[0].x, ctp[1].y, ctp[2].z, ctp[3].w); + logger::info("[DLSS-CTP] Eye {} prevVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, prevViewProjSL[0].x, prevViewProjSL[1].y, prevViewProjSL[2].z, prevViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} currVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, currViewProjSL[0].x, currViewProjSL[1].y, currViewProjSL[2].z, currViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} cameraPos=({:.2f},{:.2f},{:.2f}) fov={:.4f} mvecScale=({:.4f},{:.4f})", + eyeIndex, slConstants.cameraPos.x, slConstants.cameraPos.y, slConstants.cameraPos.z, + slConstants.cameraFOV, slConstants.mvecScale.x, slConstants.mvecScale.y); + } + } } else { recalculateCameraMatrices(slConstants); } @@ -268,7 +360,26 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.jitterOffset = { -jitter.x, -jitter.y }; slConstants.reset = sl::Boolean::eFalse; - slConstants.mvecScale = { 1.0f, 1.0f }; + // mvecScale normalizes motion vectors to [-1,1] range. The Streamline DLSS plugin + // then multiplies by the input render dimensions to get pixel displacement: + // MV_Scale = mvecScale * renderWidth + // The game's motion vectors are in [-1,1] normalized to the FULL per-eye dimensions. + // Without viewport scaling, renderWidth = eyeWidthIn → MV_Scale = eyeWidthIn → correct. + // With viewport scaling, renderWidth = cropWidthIn = eyeWidthIn * vpScale, so DLSS + // underestimates motion by vpScale. Compensate by scaling mvecScale by 1/vpScale. + if (globals::game::isVR && globals::features::upscaling.settings.vrDlssViewportScale < 1.0f) { + float invScale = 1.0f / globals::features::upscaling.settings.vrDlssViewportScale; + slConstants.mvecScale = { invScale, invScale }; + } else { + slConstants.mvecScale = { 1.0f, 1.0f }; + } + // Log mvecScale after assignment (was previously logged before assignment, showing uninitialized values) + if (globals::game::isVR && TAAReorder::g_diagCounter == 0 && globals::features::upscaling.settings.vrDlssViewportScale < 1.0f) { + logger::info("[TAAReorder] Eye {} mvecScale=({:.4f},{:.4f}) jitter=({:.4f},{:.4f})", + eyeIndex, slConstants.mvecScale.x, slConstants.mvecScale.y, + slConstants.jitterOffset.x, slConstants.jitterOffset.y); + } + slConstants.motionVectors3D = sl::Boolean::eFalse; slConstants.motionVectorsInvalidValue = FLT_MIN; slConstants.orthographicProjection = sl::Boolean::eFalse; @@ -277,6 +388,13 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye if (SL_FAILED(res, slSetConstants(slConstants, *frameToken, p_viewport))) { logger::error("[Streamline] Could not set constants for eye {}", eyeIndex); + } else { + static uint32_t constDiagCounter = 0; + if (constDiagCounter++ % 300 == 0) { + logger::info("[Streamline] slSetConstants OK eye={} jitter=({:.4f},{:.4f}) fov={:.4f}", + eyeIndex, slConstants.jitterOffset.x, slConstants.jitterOffset.y, + slConstants.cameraFOV); + } } } @@ -304,7 +422,7 @@ bool Streamline::IsRTXAndBelow40Series(IDXGIAdapter* a_adapter) return false; } -void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) +void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height) { sl::DLSSOptions dlssOptions{}; @@ -328,10 +446,8 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) break; } - auto state = globals::state; - dlssOptions.outputWidth = width; - dlssOptions.outputHeight = (uint)state->screenSize.y; + dlssOptions.outputHeight = height; // Detect HDR from kMAIN format at runtime -- VR kMAIN may be 8-bit while SE is FP16 { @@ -394,7 +510,7 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth) + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight) { auto context = globals::d3d::context; @@ -406,7 +522,7 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, sl::Resource transparencyMaskRes = { sl::ResourceType::eTex2d, transparencyMask, 0 }; CheckFrameConstants(vp, eyeIndex); - SetDLSSOptions(vp, outputWidth); + SetDLSSOptions(vp, outputWidth, outputHeight); sl::ResourceTag tags[] = { { &colorInRes, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &extentIn }, @@ -438,17 +554,24 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, if (state->frameAnnotations) state->EndPerfEvent(); - if (evalResult != sl::Result::eOk) { - static bool evalErrorLogged[2] = { false, false }; + // Rate-limited diagnostic logging for DLSS evaluation results + { + static uint32_t evalDiagCounter[2] = { 0, 0 }; uint32_t logIdx = globals::game::isVR ? eyeIndex : 0; - if (!evalErrorLogged[logIdx]) { - evalErrorLogged[logIdx] = true; - logger::error("[Streamline] slEvaluateFeature failed{} result={}", globals::game::isVR ? std::format(" for eye {}", eyeIndex) : "", (int)evalResult); + bool diagLog = (evalDiagCounter[logIdx]++ % 300 == 0); + + if (evalResult != sl::Result::eOk) { + if (diagLog) { + logger::error("[Streamline] slEvaluateFeature FAILED eye={} result={} (frame {})", + eyeIndex, (int)evalResult, evalDiagCounter[logIdx]); + } + } else if (diagLog) { + logger::info("[Streamline] slEvaluateFeature OK eye={} (frame {})", eyeIndex, evalDiagCounter[logIdx]); } } } -void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors) +void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only) { auto state = globals::state; @@ -458,9 +581,15 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r auto screenSize = state->screenSize; auto renderSize = Util::ConvertToDynamic(screenSize); - // VR: Combined-buffer mode with extent offsets causes temporal ghosting on the right eye - // because DLSS's internal history buffers use extent offsets as indices. - // Per-eye isolation with extents at {0,0} is required. + // VR: Per-eye isolation is required. Each eye uses a separate per-eye texture + // with its own viewport handle, avoiding cross-eye history contamination. + // When viewport scaling is active (vrDlssViewportScale < 1.0): + // - All DLSS inputs are physically cropped to the center sub-region at {0,0}. + // This eliminates non-zero subrect base offsets which break temporal reprojection. + // - Camera matrices are adjusted in CheckFrameConstants to match the crop's FOV. + // - FillPeriphery bilinear-upscales the full render-res input to vrFinalOutput, + // then FinalizePerEyeOutputs pastes the DLSS crop output into the center. + // When viewport scaling is off (scale == 1.0), all textures are full-size at {0,0}. if (globals::game::isVR) { auto& upscaling = globals::features::upscaling; uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); @@ -468,21 +597,50 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; + float vpScale = upscaling.settings.vrDlssViewportScale; + bool viewportScaling = vpScale < 1.0f; + + uint32_t dlssWidthIn = viewportScaling ? (uint32_t)(eyeWidthIn * vpScale) : eyeWidthIn; + uint32_t dlssHeightIn = viewportScaling ? (uint32_t)(eyeHeightIn * vpScale) : eyeHeightIn; + uint32_t dlssWidthOut = viewportScaling ? (uint32_t)(eyeWidthOut * vpScale) : eyeWidthOut; + uint32_t dlssHeightOut = viewportScaling ? (uint32_t)(eyeHeightOut * vpScale) : eyeHeightOut; + upscaling.PreparePerEyeInputs(a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask); - for (uint32_t i = 0; i < 2; ++i) { + // Periphery TAA diagnostic + if (TAAReorder::g_diagCounter == 0 && viewportScaling && upscaling.settings.vrPeripheryTAA) { + logger::info("[TAAReorder] Periphery TAA: vrTAAdPerEye[0]={}, g_initialized={} (TAA injected at display RT level)", + (void*)upscaling.vrTAAdPerEye[0].get(), TAAReorder::g_initialized); + } + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { sl::ViewportHandle vp = (i == 1) ? viewportRight : viewport; - sl::Extent extentIn{ 0, 0, eyeWidthIn, eyeHeightIn }; - sl::Extent extentOut{ 0, 0, eyeWidthOut, eyeHeightOut }; + + if (viewportScaling) { + // Pre-fill composition target with bilinear upscale of full render-res eye. + // DLSS output is pasted on top in FinalizePerEyeOutputs. + upscaling.FillPeriphery(i, eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + } + + // All extents are {0,0} - inputs are physically crop-sized (or full-sized when not scaling). + // No non-zero subrect base offsets, which is critical for DLSS temporal reprojection. + sl::Extent extentIn = { 0, 0, dlssWidthIn, dlssHeightIn }; + sl::Extent extentOut = { 0, 0, dlssWidthOut, dlssHeightOut }; + + // When viewport scaling, use crop-sized vrCropColorIn; otherwise use full vrIntermediateColorIn + ID3D11Resource* colorInput = viewportScaling ? + upscaling.vrCropColorIn[i]->resource.get() : + upscaling.vrIntermediateColorIn[i]->resource.get(); EvaluateDLSS(vp, i, - upscaling.vrIntermediateColorIn[i]->resource.get(), upscaling.vrIntermediateColorOut[i]->resource.get(), + colorInput, upscaling.vrIntermediateColorOut[i]->resource.get(), upscaling.vrIntermediateDepth[i]->resource.get(), upscaling.vrIntermediateMotionVectors[i]->resource.get(), upscaling.vrIntermediateReactiveMask[i]->resource.get(), upscaling.vrIntermediateTransparencyMask[i]->resource.get(), - extentIn, extentOut, eyeWidthOut); + extentIn, extentOut, dlssWidthOut, dlssHeightOut); } - upscaling.FinalizePerEyeOutputs(a_upscalingTexture); + upscaling.FinalizePerEyeOutputs(a_upscalingTexture, eye0Only); } else { // Non-VR: Simple full-texture upscale sl::Extent extentIn{ 0, 0, (uint)renderSize.x, (uint)renderSize.y }; @@ -491,7 +649,7 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r EvaluateDLSS(viewport, 0, a_upscalingTexture, a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask, - extentIn, extentOut, (uint)screenSize.x); + extentIn, extentOut, (uint)screenSize.x, (uint)screenSize.y); } } /** diff --git a/src/Features/Upscaling/Streamline.h b/src/Features/Upscaling/Streamline.h index 0f771fb9a0..348f0b6849 100644 --- a/src/Features/Upscaling/Streamline.h +++ b/src/Features/Upscaling/Streamline.h @@ -70,7 +70,7 @@ class Streamline void EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth); + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight); // Cached DLL version info for Streamline plugin directory static std::vector> dllVersions; @@ -85,9 +85,9 @@ class Streamline bool IsRTXAndBelow40Series(IDXGIAdapter* a_adapter); - void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width); + void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height); - void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors); + void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only = false); void DestroyDLSSResources(); }; From 78e003c2367420bd116287c88ba47346fd2106c6 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 15 Mar 2026 23:34:28 -0600 Subject: [PATCH 13/16] feat(vr-dlss): periphery TAA via TAAReorder post-conductor hooks Implement periphery TAA for VR DLSS viewport scaling by hooking into the BSImagespaceShader render pipeline at the post-conductor level. When DLSS processes only a center crop of each eye, the peripheral region needs temporal anti-aliasing from the native TAA pass. This module reorders the pipeline so TAA runs on the full frame while DLSS processes the crop, then composites the DLSS center onto the TAA'd periphery. Hook architecture (all RVAs are SkyrimVR.exe): - BSImagespaceShaderHook (0x132C827): wraps conductor + Phase 5 TAA, evaluates DLSS on Phase 2A output copy after func() returns - ExecutePassHook (0x012D2540): captures Phase 2A output to g_postPPCopy for DLSS color source - ConductorCallHook (0x1325086): tracks conductor state - ForceTAASetter/TAAStateMachine: pass-through TAA control hooks - DepthStencilRegHook (0x00DC79D0): diagnostic logging for depth target registration - SubmitHook (0x00C53920): diagnostic logging for VR frame submission Based on PureDark's BSImagespaceShader_Hook_VR approach from Skyrim-Upscaler VR (MIT license: https://github.com/PureDark/Skyrim-Upscaler). Co-Authored-By: Claude Opus 4.6 --- src/Features/TAAReorder.cpp | 569 ++++++++++++++++++++++++++++++++++++ src/Features/TAAReorder.h | 174 +++++++++++ 2 files changed, 743 insertions(+) create mode 100644 src/Features/TAAReorder.cpp create mode 100644 src/Features/TAAReorder.h diff --git a/src/Features/TAAReorder.cpp b/src/Features/TAAReorder.cpp new file mode 100644 index 0000000000..a8e0e0b9b3 --- /dev/null +++ b/src/Features/TAAReorder.cpp @@ -0,0 +1,569 @@ +/** + * @brief TAA Periphery Reordering for VR DLSS Viewport Scaling + * + * This implementation follows the approach pioneered by PureDark's Skyrim Upscaler + * (https://github.com/PureDark/Skyrim-Upscaler/tree/VR), which demonstrated how to + * reorder Skyrim's post-processing pipeline to run vanilla TAA on the periphery while + * DLSS processes a cropped center region. No code was copied; the approach was used as + * a reference for the conductor/hook architecture. + * + * PureDark's Skyrim Upscaler is licensed under the MIT License: + * Copyright (c) 2022 PureDark + * https://github.com/PureDark/Skyrim-Upscaler/blob/VR/LICENSE + */ +#include "TAAReorder.h" + +#include "Globals.h" +#include "Upscaling.h" +#include +#include + +namespace TAAReorder +{ + bool ShouldReorderTAA() + { + if (!g_initialized) + return false; + auto& upscaling = globals::features::upscaling; + return globals::game::isVR && + upscaling.settings.vrPeripheryTAA && + upscaling.settings.vrDlssViewportScale < 1.0f && + upscaling.GetUpscaleMethod() == Upscaling::UpscaleMethod::kDLSS; + } + + // ─── Setter A: Force TAA (pass-through) ─── + void ForceTAASetter::thunk() + { + func(); + } + + // ─── Setter B: TAA State Machine (pass-through) ─── + void TAAStateMachine::thunk() + { + func(); + } + + // ─── EnsurePostPPCopy: create/resize staging texture matching source ─── + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex) + { + D3D11_TEXTURE2D_DESC srcDesc; + sourceTex->GetDesc(&srcDesc); + + if (g_postPPCopy) { + D3D11_TEXTURE2D_DESC existingDesc; + g_postPPCopy->GetDesc(&existingDesc); + if (existingDesc.Width == srcDesc.Width && existingDesc.Height == srcDesc.Height && + existingDesc.Format == srcDesc.Format) + return; + } + + D3D11_TEXTURE2D_DESC desc = srcDesc; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.MiscFlags = 0; + g_postPPCopy = nullptr; + g_postPPCopySRV = nullptr; + globals::d3d::device->CreateTexture2D(&desc, nullptr, g_postPPCopy.put()); + + if (g_postPPCopy) { + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + globals::d3d::device->CreateShaderResourceView(g_postPPCopy.get(), &srvDesc, g_postPPCopySRV.put()); + Util::SetResourceName(g_postPPCopy.get(), "TAAReorder_PostPPCopy"); + } + } + + // ─── Helper: set up common fullscreen rendering state ─── + static void SetupFullscreenState(ID3D11DeviceContext* context, float vpX, float vpY, float vpW, float vpH) + { + D3D11_VIEWPORT viewport = {}; + viewport.TopLeftX = vpX; + viewport.TopLeftY = vpY; + viewport.Width = vpW; + viewport.Height = vpH; + viewport.MaxDepth = 1.0f; + + auto& upscaling = globals::features::upscaling; + context->RSSetViewports(1, &viewport); + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + context->OMSetBlendState(upscaling.upscaleBlendState.get(), nullptr, 0xffffffff); + } + + // ─── Helper: draw fullscreen triangle (point-sample format-converting copy) ─── + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH) + { + auto& upscaling = globals::features::upscaling; + auto context = globals::d3d::context; + + SetupFullscreenState(context, vpX, vpY, vpW, vpH); + context->PSSetShader(upscaling.GetDlssCompositePS(), nullptr, 0); + + ID3D11ShaderResourceView* srvs[] = { srcSRV }; + context->PSSetShaderResources(0, 1, srvs); + + ID3D11RenderTargetView* rtvs[] = { dstRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + context->Draw(3, 0); + } + + // ─── ExecutePass hook: capture Phase 2A output, detect Phase 5 ─── + void ExecutePassHook::thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag) + { + bool isPeripheryTAA = ShouldReorderTAA(); + bool shouldLog = (g_diagCounter == 0); + + // Compute pass index for Phase 2A / Phase 5 detection + int passIndex = -1; + if (isPeripheryTAA || shouldLog) { + uintptr_t managerAddr = (uintptr_t)manager; + uintptr_t passArrayBase = *(uintptr_t*)(managerAddr + 0x28); + if (passArrayBase) { + for (int i = 0; i < 40; i++) { + if (*(uintptr_t*)(passArrayBase + i * 8) == (uintptr_t)passObj) { + passIndex = i; + break; + } + } + } + } + + if (shouldLog) + logger::info("[TAAReorder] ExecutePass: src=0x{:X} dst=0x{:X} flag={} passIdx={}", + srcTech, dstTech, flag, passIndex); + + // Execute the original pass + func(manager, passObj, srcTech, dstTech, extraData, flag); + + // After Phase 2A: copy output RT to g_postPPCopy for DLSS to process + if (isPeripheryTAA && passIndex == 30 && dstTech == 0x29) { + ID3D11RenderTargetView* postRTV = nullptr; + globals::d3d::context->OMGetRenderTargets(1, &postRTV, nullptr); + if (postRTV) { + ID3D11Resource* res = nullptr; + postRTV->GetResource(&res); + if (res) { + ID3D11Texture2D* postTex = nullptr; + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&postTex); + if (postTex) { + EnsurePostPPCopy(postTex); + globals::d3d::context->CopyResource(g_postPPCopy.get(), postTex); + g_postPPReady = true; + if (shouldLog) { + D3D11_TEXTURE2D_DESC desc; + postTex->GetDesc(&desc); + logger::info("[TAAReorder] Phase 2A output: {}x{} fmt={} → copied to g_postPPCopy", + desc.Width, desc.Height, (uint32_t)desc.Format); + } + postTex->Release(); + } + res->Release(); + } + postRTV->Release(); + } + } + + // Detect Phase 5 completion + if (isPeripheryTAA && passIndex == 35) { + g_phase5Complete = true; + if (shouldLog) + logger::info("[TAAReorder] Phase 5 complete (passIdx=35)"); + } + } + + // ─── BSImagespaceShader hook: DLSS eval + paste after pipeline completes ─── + // Wraps call at 0x132C827 (write_thunk_call). func() encompasses the + // conductor (Phase 2A) but NOT Phase 5 (TAA+DRS) — Phase 5 runs after us. + // We evaluate DLSS on the captured Phase 2A output and paste the center + // via CopySubresourceRegion onto the submit texture. + void BSImagespaceShaderHook::thunk(void* a_this, uint64_t a_param) + { + func(a_this, a_param); + + if (!ShouldReorderTAA()) + return; + + bool shouldLog = (g_diagCounter == 0); + auto context = globals::d3d::context; + auto& upscaling = globals::features::upscaling; + + // Get submit texture from bound RT after pipeline stage completes + ID3D11RenderTargetView* submitRTV = nullptr; + context->OMGetRenderTargets(1, &submitRTV, nullptr); + ID3D11Texture2D* submitTex = nullptr; + if (submitRTV) { + ID3D11Resource* res = nullptr; + submitRTV->GetResource(&res); + if (res) { + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&submitTex); + res->Release(); + } + submitRTV->Release(); + } + + if (shouldLog) { + if (submitTex) { + D3D11_TEXTURE2D_DESC desc; + submitTex->GetDesc(&desc); + logger::info("[TAAReorder] BSImagespaceShaderHook: submitTex=0x{:X} {}x{} fmt={} bind=0x{:X} postPPReady={} phase5={}", + (uintptr_t)submitTex, desc.Width, desc.Height, (uint32_t)desc.Format, + desc.BindFlags, g_postPPReady, g_phase5Complete); + } else { + logger::info("[TAAReorder] BSImagespaceShaderHook: no submitTex bound"); + } + } + + // Step 1: Evaluate DLSS on the captured post-PP intermediate + if (g_postPPReady && g_postPPCopy) { + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: evaluating DLSS on g_postPPCopy..."); + + upscaling.Upscale(g_postPPCopy.get()); + g_dlssReady = true; + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: DLSS evaluation complete"); + } else if (shouldLog) { + logger::info("[TAAReorder] BSImagespaceShaderHook: skip DLSS (postPPReady={} postPPCopy={})", + g_postPPReady, (void*)g_postPPCopy.get()); + } + + // Step 2: Paste DLSS center from g_postPPCopy onto submit texture per-eye + if (g_dlssReady && submitTex && g_postPPCopy) { + auto screenSize = globals::state->screenSize; + uint32_t eyeW = (uint32_t)(screenSize.x / 2); + uint32_t eyeH = (uint32_t)screenSize.y; + float vpScale = upscaling.settings.vrDlssViewportScale; + uint32_t centerW = (uint32_t)(eyeW * vpScale); + uint32_t centerH = (uint32_t)(eyeH * vpScale); + uint32_t baseCenterX = (eyeW - centerW) / 2; + uint32_t centerY = (eyeH - centerH) / 2; + + // Apply nasal offset (in display resolution space, matching FinalizePerEyeOutputs) + int32_t nasalShift = (int32_t)(upscaling.settings.vrDlssCropOffsetX * eyeW); + + float featherWidth = upscaling.settings.vrDlssFeatherWidth; + float featherPixels = featherWidth * eyeW; + + // Feathered blend path: use FeatheredCompositePS with hardware alpha blending + bool useFeathered = featherPixels > 0.0f && upscaling.vrFeatheredCompositePS && upscaling.vrFeatheredCompositeBlendState; + if (useFeathered) { + // Re-acquire submitRTV (we released it above, need it for render target binding) + ID3D11RenderTargetView* pasteRTV = nullptr; + context->OMGetRenderTargets(1, &pasteRTV, nullptr); + + if (pasteRTV) { + // Save current pipeline state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11VertexShader* oldVS = nullptr; + context->VSGetShader(&oldVS, nullptr, nullptr); + ID3D11PixelShader* oldPS = nullptr; + context->PSGetShader(&oldPS, nullptr, nullptr); + + UINT oldNumVPs = 1; + D3D11_VIEWPORT oldVP; + context->RSGetViewports(&oldNumVPs, &oldVP); + + ID3D11ShaderResourceView* oldPSSRV = nullptr; + context->PSGetShaderResources(0, 1, &oldPSSRV); + ID3D11SamplerState* oldPSSampler = nullptr; + context->PSGetSamplers(0, 1, &oldPSSampler); + ID3D11Buffer* oldPSCB = nullptr; + context->PSGetConstantBuffers(0, 1, &oldPSCB); + + // Ensure CB exists (lazy create, matching Upscaling.cpp pattern) + if (!upscaling.vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, upscaling.vrFeatheredCompositeCB.put())); + } + + // Set shared state: VS, PS, IA, blend + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->PSSetShader(upscaling.vrFeatheredCompositePS.get(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(upscaling.vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + // Bind g_postPPCopy SRV as crop source at t0 + ID3D11ShaderResourceView* srvs[1] = { g_postPPCopySRV.get() }; + context->PSSetShaderResources(0, 1, srvs); + + // Bind render target + ID3D11RenderTargetView* rtvs[1] = { pasteRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + // Create/use linear sampler (use Upscaling's if available) + if (!upscaling.vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, upscaling.vrLinearSampler.put()); + } + ID3D11SamplerState* samplers[1] = { upscaling.vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + // Set viewport to this eye region within the SBS submit texture + D3D11_VIEWPORT vp = {}; + vp.TopLeftX = (float)eyeOffset; + vp.TopLeftY = 0.0f; + vp.Width = (float)eyeW; + vp.Height = (float)eyeH; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + // Update constant buffer with crop rect in SCREEN-SPACE pixel coordinates. + // SV_Position in the pixel shader is in screen space (not viewport-relative): + // for eye 0, x ranges [0, eyeW); for eye 1, x ranges [eyeW, 2*eyeW). + // CropOrigin must therefore include the eye offset so distance calculations + // in FeatheredCompositePS work correctly for both eyes. + // SrcUVOrigin/Scale remap crop-local [0,1] UV to the correct eye region + // within the full SBS g_postPPCopy texture. + uint32_t fullW = eyeW * 2; + uint32_t fullH = eyeH; + float srcUVOriginX = (float)(eyeOffset + offsetCenterX) / (float)fullW; + float srcUVOriginY = (float)centerY / (float)fullH; + float srcUVScaleX = (float)centerW / (float)fullW; + float srcUVScaleY = (float)centerH / (float)fullH; + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(upscaling.vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)(eyeOffset + offsetCenterX), (float)centerY, + (float)centerW, (float)centerH, + featherPixels, 0.0f, + srcUVOriginX, srcUVOriginY, + srcUVScaleX, srcUVScaleY, + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(upscaling.vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { upscaling.vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + context->Draw(3, 0); + } + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: feathered composite {}x{} at ({},{}) feather={:.1f}px nasalShift={} per-eye onto submit", + centerW, centerH, baseCenterX, centerY, featherPixels, nasalShift); + + // Restore pipeline state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + context->RSSetViewports(1, &oldVP); + context->VSSetShader(oldVS, nullptr, 0); + context->PSSetShader(oldPS, nullptr, 0); + context->PSSetShaderResources(0, 1, &oldPSSRV); + context->PSSetSamplers(0, 1, &oldPSSampler); + context->PSSetConstantBuffers(0, 1, &oldPSCB); + + if (oldBlendState) oldBlendState->Release(); + if (oldVS) oldVS->Release(); + if (oldPS) oldPS->Release(); + if (oldPSSRV) oldPSSRV->Release(); + if (oldPSSampler) oldPSSampler->Release(); + if (oldPSCB) oldPSCB->Release(); + + pasteRTV->Release(); + } else { + useFeathered = false; // fall through to hard copy + if (shouldLog) + logger::warn("[TAAReorder] BSImagespaceShaderHook: feathered path - could not re-acquire submitRTV, falling back to hard copy"); + } + } + if (!useFeathered) { + // Hard edge path: CopySubresourceRegion (feather disabled or resources not ready) + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + D3D11_BOX srcBox = { + eyeOffset + offsetCenterX, centerY, 0, + eyeOffset + offsetCenterX + centerW, centerY + centerH, 1 + }; + context->CopySubresourceRegion(submitTex, 0, + eyeOffset + offsetCenterX, centerY, 0, + g_postPPCopy.get(), 0, &srcBox); + } + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: hard-copy pasted DLSS crop {}x{} at ({},{}) nasalShift={} per-eye onto submit", + centerW, centerH, baseCenterX, centerY, nasalShift); + } + + g_dlssPasteComplete = true; + } else if (shouldLog) { + logger::info("[TAAReorder] BSImagespaceShaderHook: skip paste (dlssReady={} submitTex={} postPPCopy={})", + g_dlssReady, (void*)submitTex, (void*)g_postPPCopy.get()); + } + + if (submitTex) + submitTex->Release(); + } + + // ─── Depth/stencil registration hook: diagnostic logging ─── + // Tracks dimensions per slot and logs whenever they change. + // data[0]=width, data[1]=height based on initial analysis. + void DepthStencilRegHook::thunk(void* manager, uint32_t slot, void* desc) + { + if (desc && slot < 32) { + auto* data = reinterpret_cast(desc); + static uint32_t lastWidth[32] = {}; + static uint32_t lastHeight[32] = {}; + static uint32_t callCount[32] = {}; + + callCount[slot]++; + bool dimsChanged = (data[0] != lastWidth[slot] || data[1] != lastHeight[slot]); + if (dimsChanged) { + logger::info("[TAAReorder] DepthStencilReg: slot={} {}x{} → {}x{} (call #{}) data[2..7]= {} {} {} {} {} {}", + slot, lastWidth[slot], lastHeight[slot], data[0], data[1], callCount[slot], + data[2], data[3], data[4], data[5], data[6], data[7]); + lastWidth[slot] = data[0]; + lastHeight[slot] = data[1]; + } + } + + func(manager, slot, desc); + } + + // ─── Hidden area mesh render hook: pass-through ─── + // HAM renders normally. Previous "frozen frame" artifacts at the HAM boundary + // were caused by the depth upscaler's conservative blending (GatherRed + lerp) + // leaking depth=0 mask values into valid depth. Fixed in DepthUpscalePS.hlsl + // by switching to pure point sampling. + // HiddenAreaMeshHook removed — the passthrough hook was breaking HAM + // by corrupting the original function via Detours on an unverified RVA. + + // ─── BSOpenVR::Submit hook: diagnostic logging ─── + void SubmitHook::thunk(void* thisPtr, void* textureHandle) + { + if (g_diagCounter == 0 && textureHandle) { + auto tex2d = static_cast(textureHandle); + D3D11_TEXTURE2D_DESC desc = {}; + tex2d->GetDesc(&desc); + auto base = REL::Module::get().base(); + auto retAddr = reinterpret_cast(_ReturnAddress()); + logger::info("[TAAReorder] Submit: tex=0x{:X} {}x{} fmt={} dlssPasted={} callerRVA=0x{:X}", + (uintptr_t)textureHandle, desc.Width, desc.Height, (uint32_t)desc.Format, + g_dlssPasteComplete, retAddr - base); + } + + func(thisPtr, textureHandle); + } + + // ─── Post-processing conductor call hook: pass-through (tracking only) ─── + // Inner conductor call at 0x1325086 inside BSImagespaceShader::Render. + // Only tracks g_insideConductor state. DLSS logic is in BSImagespaceShaderHook. + void ConductorCallHook::thunk(void* a1, void* a2, void* a3, void* a4) + { + g_insideConductor = true; + func(a1, a2, a3, a4); + g_insideConductor = false; + } + + void InitEarly() + { + auto base = REL::Module::get().base(); + + // ─── Hook: DepthStencilRegistration (RVA 0x00DC79D0) ─── + // Must be installed before renderer initialization (which registers depth/stencil targets). + // Called from Upscaling::Load(), before D3D device creation. + DepthStencilRegHook::func = reinterpret_cast(base + 0x00DC79D0); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&DepthStencilRegHook::func), reinterpret_cast(DepthStencilRegHook::thunk)); + DetourTransactionCommit(); + + logger::info("[TAAReorder] InitEarly: DepthStencil registration hooked at RVA 0x00DC79D0"); + } + + void Init() + { + auto base = REL::Module::get().base(); + + // ─── Core pointers ─── + g_pRendererSingleton = reinterpret_cast(base + 0x034234C0); + + // ─── Hook: ForceTAASetter (RVA 0x005C8EE0) ─── + ForceTAASetter::func = base + 0x005C8EE0; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ForceTAASetter::func), reinterpret_cast(ForceTAASetter::thunk)); + DetourTransactionCommit(); + + // ─── Hook: TAAStateMachine (RVA 0x005C8F10) ─── + TAAStateMachine::func = base + 0x005C8F10; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&TAAStateMachine::func), reinterpret_cast(TAAStateMachine::thunk)); + DetourTransactionCommit(); + + // ─── Hook: ExecutePass (RVA 0x012D2540) ─── + ExecutePassHook::func = reinterpret_cast(base + 0x012D2540); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ExecutePassHook::func), reinterpret_cast(ExecutePassHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSOpenVR::Submit (RVA 0x00C53920) ─── + SubmitHook::func = reinterpret_cast(base + 0x00C53920); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&SubmitHook::func), reinterpret_cast(SubmitHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSImagespaceShader via write_thunk_call at RVA 0x132C827 ─── + // Wraps BSImagespaceShader::Render from the Orchestrator level. + // func() encompasses conductor (Phase 2A) + Phase 5 (TAA+DRS) + Submit. + // After func(): DLSS eval + paste. Matches PureDark's BSImagespaceShader_Hook_VR. + stl::write_thunk_call(base + 0x132C827); + + // ─── Hook: Inner conductor call via write_thunk_call at RVA 0x1325086 ─── + // Pass-through, only tracks g_insideConductor state. + stl::write_thunk_call(base + 0x1325086); + + g_initialized = true; + + logger::info("[TAAReorder] Initialized — base=0x{:X}", base); + logger::info("[TAAReorder] Post-pipeline DLSS mode (periphery TAA)"); + logger::info("[TAAReorder] BSImagespaceShader hooked via write_thunk_call at RVA 0x132C827 (DLSS eval + paste)"); + logger::info("[TAAReorder] Inner conductor hooked via write_thunk_call at RVA 0x1325086 (tracking only)"); + logger::info("[TAAReorder] BSOpenVR::Submit hooked at RVA 0x00C53920"); + } +} diff --git a/src/Features/TAAReorder.h b/src/Features/TAAReorder.h new file mode 100644 index 0000000000..9720cd2e5b --- /dev/null +++ b/src/Features/TAAReorder.h @@ -0,0 +1,174 @@ +#pragma once + +// TAA Reordering for VR DLSS Viewport Scaling (Post-Conductor DLSS) +// +// PureDark's approach: DLSS is evaluated AFTER BSImagespaceShader::Render +// completes (which includes the conductor + Phase 5 TAA + DRS). +// +// Flow: +// 1. BSImagespaceShaderHook wraps the call at 0x132C827: +// func() runs → conductor executes Phase 2A → Phase 5 (TAA + DRS) +// a. ExecutePassHook captures Phase 2A output to g_postPPCopy +// 2. After func() returns in BSImagespaceShaderHook: +// a. Gets submit texture from bound RT (now has TAA-upscaled content) +// b. Evaluates DLSS on g_postPPCopy (post-PP intermediate) +// c. Pastes DLSS center from g_postPPCopy onto submit texture +// 3. Engine continues: Orchestrator → Scaleform Display (UI) → Submit +// 4. Lock DRS + UpdateCameraData (in Main_PostProcessing::thunk after func()) +// +// Both DLSS and TAA get Phase 2A's PP applied: +// - TAA: naturally (Phase 2A runs before Phase 5 in conductor) +// - DLSS: processes the Phase 2A output copy (g_postPPCopy) +// +// All RVAs are VR-specific (SkyrimVR.exe). + +#include +#include +#include +#include + +struct Upscaling; + +namespace TAAReorder +{ + // ─── Function pointer types ─── + using ExecutePass_t = void (*)(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + using BSOpenVRSubmit_t = void (*)(void* thisPtr, void* textureHandle); + + // ─── Resolved global data pointers ─── + inline uintptr_t* g_pRendererSingleton = nullptr; + inline bool g_initialized = false; + + // ─── Diagnostics (rate-limited logging) ─── + inline int g_diagCounter = 0; + inline constexpr int DIAG_INTERVAL = 300; + + // ─── Per-frame sequence counter (for verifying call ordering) ─── + inline int g_frameSeqCounter = 0; + + // ─── ExecutePass hook (conductor interposition) ─── + // RVA: 0x012D2540 — called by the conductor for each render pass. + // Copies Phase 2A output RT to g_postPPCopy for DLSS to process. + struct ExecutePassHook + { + static void thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + static inline ExecutePass_t func = nullptr; + }; + + // ─── BSImagespaceShader hook (DLSS eval + paste after pipeline completes) ─── + // RVA: 0x132C827 — write_thunk_call wrapping BSImagespaceShader::Render. + // This is the OUTER call that encompasses the conductor + Phase 5 (TAA+DRS). + // After func() returns: submit texture has TAA-upscaled content. + // We evaluate DLSS on g_postPPCopy and paste the center onto submit texture. + // (Matches PureDark's BSImagespaceShader_Hook_VR) + struct BSImagespaceShaderHook + { + static void thunk(void* a_this, uint64_t a_param); + static inline REL::Relocation func; + }; + + // ─── Post-processing conductor call hook (pass-through, tracking only) ─── + // RVA: 0x1325086 — inner conductor call inside BSImagespaceShader::Render. + // Only used for g_insideConductor tracking. + struct ConductorCallHook + { + static void thunk(void* a1, void* a2, void* a3, void* a4); + static inline REL::Relocation func; + }; + + // ─── Post-PP copy (Phase 2A output, DLSS color source) ─── + // After Phase 2A completes, ExecutePassHook copies the bound RT here. + // BSImagespaceShaderHook passes this to Upscale() as colorSourceOverride. + // After DLSS, FinalizePerEyeOutputs writes DLSS center back into this texture. + inline winrt::com_ptr g_postPPCopy; + inline winrt::com_ptr g_postPPCopySRV; + inline bool g_postPPReady = false; + + // ─── DLSS evaluation complete flag ─── + // Set after BSImagespaceShaderHook evaluates DLSS on g_postPPCopy. + // Used to gate the DLSS center paste step. + inline bool g_dlssReady = false; + + // ─── DLSS paste complete flag ─── + // Set after ConductorCallHook pastes DLSS center onto submit texture. + inline bool g_dlssPasteComplete = false; + + // ─── Phase 5 tracking ─── + inline bool g_phase5Complete = false; + + // ─── Conductor state tracking ─── + inline bool g_insideConductor = false; + inline int g_bsHookCallCount = 0; + + // ─── RGB-only blend state (may be useful for future feathering) ─── + inline winrt::com_ptr g_rgbOnlyBlendState; + + // ─── Stencil state for HAM-aware compositing ─── + // DepthEnable=false, StencilEnable=true, StencilFunc=EQUAL, StencilRef=0. + // Only writes to pixels where stencil==0 (visible, non-HAM pixels). + // Matches PureDark's approach in Evaluate()/RenderTexture(). + inline winrt::com_ptr g_hamStencilState; + + // ─── Cached UAV for submit texture (ClearHMDMask + ForceAlpha on submit after DLSS paste) ─── + inline winrt::com_ptr g_submitTexUAV; + inline ID3D11Texture2D* g_submitTexUAVOwner = nullptr; // track which texture the UAV belongs to + + // ─── ForceAlpha compute shader (sets alpha=1.0 to fix Scaleform UI rendering) ─── + inline winrt::com_ptr g_forceAlphaCS; + + // ─── Setter hook: Setter A (Force TAA) ─── + // RVA: 0x005C8EE0 — unconditional TAA enable. + // Pass-through (we want TAA to run natively). + struct ForceTAASetter + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Setter hook: Setter B (TAA State Machine) ─── + // RVA: 0x005C8F10 — conditional TAA enable/disable. + // Pass-through (we want TAA to run natively). + struct TAAStateMachine + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Depth/stencil registration hook ─── + // RVA: 0x00DC79D0 — registers depth/stencil targets in the RT manager (+0x1388). + // Separate path from color RTs (registered via sub_417980 at +0x1350). + // Hook intercepts registration to log descriptor layout and scale dimensions + // for slots 0, 1, 7 to match display resolution (fixes HAM not being upscaled). + using RegisterDepthStencil_t = void (*)(void* manager, uint32_t slot, void* desc); + struct DepthStencilRegHook + { + static void thunk(void* manager, uint32_t slot, void* desc); + static inline RegisterDepthStencil_t func = nullptr; + }; + + // ─── BSOpenVR::Submit hook (VR frame submission interception) ─── + // RVA: 0x00C53920 — BSOpenVR::Submit, vtable[3]. + // Diagnostic logging only. + struct SubmitHook + { + static void thunk(void* thisPtr, void* textureHandle); + static inline BSOpenVRSubmit_t func = nullptr; + }; + + // Check if TAA reordering should be active based on current settings + bool ShouldReorderTAA(); + + // Ensure g_postPPCopy matches the source texture dimensions/format + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex); + + // Helper: draw fullscreen format-converting copy (Load-based, 1:1 pixel copy). + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH); + + // Install hooks that must be in place before renderer initialization (depth/stencil reg). + // Call from Upscaling::Load() (VR only). + void InitEarly(); + + // Initialize all pointers and install hooks. Call once from PostPostLoad (VR only). + void Init(); +} From 59ec3bec3f93e078580dc93bf96221592ddf935a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Mar 2026 16:34:19 +0000 Subject: [PATCH 14/16] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commi?= =?UTF-8?q?t.ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- .../Shaders/Upscaling/ClearHMDMaskCS.hlsl | 21 +++++----- .../Shaders/Upscaling/DLSSCompositePS.hlsl | 10 ++--- .../Shaders/Upscaling/DepthUpscalePS.hlsl | 12 +++--- .../Upscaling/FeatheredCompositeCS.hlsl | 18 ++++---- .../Upscaling/FeatheredCompositePS.hlsl | 16 +++---- .../Shaders/Upscaling/ForceAlphaCS.hlsl | 3 +- .../Shaders/Upscaling/VRPeripheryFillCS.hlsl | 4 +- src/Features/TAAReorder.cpp | 18 +++++--- src/Features/Upscaling.cpp | 42 +++++++++---------- src/Features/Upscaling.h | 20 ++++----- src/Features/Upscaling/Streamline.cpp | 4 +- src/Globals.cpp | 6 ++- 12 files changed, 86 insertions(+), 88 deletions(-) diff --git a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl index c5c60611d4..dc5d38d4b2 100644 --- a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl @@ -20,33 +20,32 @@ cbuffer ClearHMDMaskCB : register(b0) uint DepthOffsetY; // Y offset into combined stereo depth (non-zero when viewport scaling crops vertically) uint FallbackOffsetX; // X offset into FallbackIn for stereo (0 when unused or left eye) // Optional coordinate scaling (zero = disabled, for backwards compat) - uint DepthWidth; // render-res eye width; if 0, no scaling (1:1 depth/color coords) - uint DepthHeight; // render-res eye height - uint ColorWidth; // display-res eye width - uint ColorHeight; // display-res eye height + uint DepthWidth; // render-res eye width; if 0, no scaling (1:1 depth/color coords) + uint DepthHeight; // render-res eye height + uint ColorWidth; // display-res eye width + uint ColorHeight; // display-res eye height }; Texture2D DepthIn : register(t0); Texture2D FallbackIn : register(t1); RWTexture2D ColorInOut : register(u0); -[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) -{ +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { uint2 colorPos = dispatchID.xy + uint2(ColorOffsetX, 0); uint2 depthPos; if (DepthWidth > 0) { // Scale from display-res color coordinates to render-res depth coordinates depthPos = uint2( - (dispatchID.x * DepthWidth) / ColorWidth, - (dispatchID.y * DepthHeight) / ColorHeight - ) + uint2(DepthOffsetX, DepthOffsetY); + (dispatchID.x * DepthWidth) / ColorWidth, + (dispatchID.y * DepthHeight) / ColorHeight) + + uint2(DepthOffsetX, DepthOffsetY); } else { depthPos = dispatchID.xy + uint2(DepthOffsetX, DepthOffsetY); } if (DepthIn[depthPos] == 0.0) ColorInOut[colorPos] = FallbackIn[dispatchID.xy + uint2(FallbackOffsetX, 0)]; - // When FallbackIn is unbound (existing callers): returns (0,0,0,0) → clears to black - // When FallbackIn is bound (TAA mask restore): returns display RT content + // When FallbackIn is unbound (existing callers): returns (0,0,0,0) → clears to black + // When FallbackIn is bound (TAA mask restore): returns display RT content } diff --git a/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl index da776ae442..8cf6b900d3 100644 --- a/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl @@ -13,13 +13,13 @@ Texture2D Source : register(t0); -#ifdef BILINEAR_UPSCALE +# ifdef BILINEAR_UPSCALE cbuffer CompositeCB : register(b0) { float2 DynResScale; // renderRes / displayRes (per-eye) - float2 EyeOffset; // (i * eyeWidth, 0) in texels - float2 SrcTexSize; // full texture dimensions in texels + float2 EyeOffset; // (i * eyeWidth, 0) in texels + float2 SrcTexSize; // full texture dimensions in texels float2 pad; }; @@ -36,13 +36,13 @@ float4 main(VS_OUTPUT input) : SV_Target return Source.SampleLevel(LinearSampler, srcUV, 0); } -#else +# else float4 main(VS_OUTPUT input) : SV_Target { return Source.Load(int3(input.Position.xy, 0)); } -#endif // BILINEAR_UPSCALE +# endif // BILINEAR_UPSCALE #endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl index 41ade15e2d..e5650af665 100644 --- a/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl @@ -20,23 +20,23 @@ #include "Upscaling/UpscaleVS.hlsl" #if defined(PSHADER) -#include "Common/FrameBuffer.hlsli" -#include "Common/SharedData.hlsli" +# include "Common/FrameBuffer.hlsli" +# include "Common/SharedData.hlsli" typedef VS_OUTPUT PS_INPUT; struct PS_OUTPUT { - float Depth : SV_Depth; + float Depth: SV_Depth; }; Texture2D DepthLowRes : register(t0); cbuffer DepthUpscaleCB : register(b0) { - float2 SourceDim; // Full texture dimensions (texels) - float2 InvSourceDim; // 1.0 / SourceDim - float2 Scale; // resolutionScale (render/display ratio) + float2 SourceDim; // Full texture dimensions (texels) + float2 InvSourceDim; // 1.0 / SourceDim + float2 Scale; // resolutionScale (render/display ratio) float2 Pad; }; diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl index 4167802e0d..16116fb4e0 100644 --- a/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl @@ -1,19 +1,17 @@ cbuffer FeatherCB : register(b0) { - uint CropX; // paste position X in output space - uint CropY; // paste position Y in output space - uint CropW; // crop width - uint CropH; // crop height - float FeatherWidth; // feather distance in pixels (inward from crop edge) + uint CropX; // paste position X in output space + uint CropY; // paste position Y in output space + uint CropW; // crop width + uint CropH; // crop height + float FeatherWidth; // feather distance in pixels (inward from crop edge) float3 pad; }; -Texture2D CropTexture : register(t0); // DLSS output (crop-sized, at {0,0}) -RWTexture2D OutputTexture : register(u0); // vrFinalOutput (already filled with periphery) +Texture2D CropTexture : register(t0); // DLSS output (crop-sized, at {0,0}) +RWTexture2D OutputTexture : register(u0); // vrFinalOutput (already filled with periphery) -[numthreads(8, 8, 1)] -void main(uint3 dispatchID : SV_DispatchThreadID) -{ +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { // dispatchID is in crop-local space (0..CropW-1, 0..CropH-1) int2 cropLocal = int2(dispatchID.xy); if (cropLocal.x >= (int)CropW || cropLocal.y >= (int)CropH) diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl index cffececc09..fb6ae3f277 100644 --- a/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl @@ -15,12 +15,12 @@ SamplerState LinearSampler : register(s0); cbuffer FeatheredCompositeCB : register(b0) { - float2 CropOrigin; // paste position (x, y) in output-eye pixel coords - float2 CropSize; // crop width, height in pixels - float FeatherWidth; // feather distance in pixels (inward from crop edge) + float2 CropOrigin; // paste position (x, y) in output-eye pixel coords + float2 CropSize; // crop width, height in pixels + float FeatherWidth; // feather distance in pixels (inward from crop edge) float _pad0; - float2 SrcUVOrigin; // UV origin in source texture for this crop region - float2 SrcUVScale; // UV scale: maps [0,1] crop-local UV to source texture UV range + float2 SrcUVOrigin; // UV origin in source texture for this crop region + float2 SrcUVScale; // UV scale: maps [0,1] crop-local UV to source texture UV range }; float4 main(VS_OUTPUT input) : SV_Target @@ -28,9 +28,9 @@ float4 main(VS_OUTPUT input) : SV_Target float2 pixelPos = input.Position.xy; // Distance from each edge of the crop rect (positive = inside) - float distLeft = pixelPos.x - CropOrigin.x; - float distRight = (CropOrigin.x + CropSize.x) - pixelPos.x; - float distTop = pixelPos.y - CropOrigin.y; + float distLeft = pixelPos.x - CropOrigin.x; + float distRight = (CropOrigin.x + CropSize.x) - pixelPos.x; + float distTop = pixelPos.y - CropOrigin.y; float distBottom = (CropOrigin.y + CropSize.y) - pixelPos.y; float minDist = min(min(distLeft, distRight), min(distTop, distBottom)); diff --git a/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl index 98cf61143e..b7c3272b83 100644 --- a/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl @@ -5,8 +5,7 @@ RWTexture2D ColorInOut : register(u0); -[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) -{ +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { float4 c = ColorInOut[dispatchID.xy]; c.a = 1.0; ColorInOut[dispatchID.xy] = c; diff --git a/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl index 291280cdc0..315541e76d 100644 --- a/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl @@ -14,9 +14,7 @@ Texture2D SrcTexture : register(t0); SamplerState LinearSampler : register(s0); RWTexture2D DstTexture : register(u0); -[numthreads(8, 8, 1)] void main(uint3 dispatchID - : SV_DispatchThreadID) -{ +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { if (dispatchID.x >= DstWidth || dispatchID.y >= DstHeight) return; diff --git a/src/Features/TAAReorder.cpp b/src/Features/TAAReorder.cpp index a8e0e0b9b3..386d127a38 100644 --- a/src/Features/TAAReorder.cpp +++ b/src/Features/TAAReorder.cpp @@ -392,12 +392,18 @@ namespace TAAReorder context->PSSetSamplers(0, 1, &oldPSSampler); context->PSSetConstantBuffers(0, 1, &oldPSCB); - if (oldBlendState) oldBlendState->Release(); - if (oldVS) oldVS->Release(); - if (oldPS) oldPS->Release(); - if (oldPSSRV) oldPSSRV->Release(); - if (oldPSSampler) oldPSSampler->Release(); - if (oldPSCB) oldPSCB->Release(); + if (oldBlendState) + oldBlendState->Release(); + if (oldVS) + oldVS->Release(); + if (oldPS) + oldPS->Release(); + if (oldPSSRV) + oldPSSRV->Release(); + if (oldPSSampler) + oldPSSampler->Release(); + if (oldPSCB) + oldPSCB->Release(); pasteRTV->Release(); } else { diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 385a5b2c5c..fbc97118e6 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -3,8 +3,8 @@ #include "Deferred.h" #include "Features/VRStereoOptimizations.h" #include "Hooks.h" -#include "TAAReorder.h" #include "State.h" +#include "TAAReorder.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" #include "Upscaling/Streamline.h" @@ -999,16 +999,16 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de D3D11_TEXTURE2D_DESC srcDesc; ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || - vrCropColorIn[0]->desc.Width != cropWidthIn || - vrCropColorIn[0]->desc.Height != cropHeightIn || - vrIntermediateDepth[0]->desc.Width != cropWidthIn || - vrIntermediateDepth[0]->desc.Height != cropHeightIn || - vrIntermediateColorOut[0]->desc.Width != cropWidthOut || - vrIntermediateColorOut[0]->desc.Height != cropHeightOut || - vrFinalOutput[0]->desc.Width != eyeWidthOut || - vrFinalOutput[0]->desc.Height != eyeHeightOut); + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrCropColorIn[0]->desc.Width != cropWidthIn || + vrCropColorIn[0]->desc.Height != cropHeightIn || + vrIntermediateDepth[0]->desc.Width != cropWidthIn || + vrIntermediateDepth[0]->desc.Height != cropHeightIn || + vrIntermediateColorOut[0]->desc.Width != cropWidthOut || + vrIntermediateColorOut[0]->desc.Height != cropHeightOut || + vrFinalOutput[0]->desc.Width != eyeWidthOut || + vrFinalOutput[0]->desc.Height != eyeHeightOut); } if (needsRecreate) { @@ -1127,7 +1127,7 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de // Crop depth/mvec/reactive/transparency directly from stereo buffers D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, - offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &stereoCropBox); context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, @@ -1151,10 +1151,10 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de D3D11_TEXTURE2D_DESC srcDesc; ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); } if (needsRecreate) { logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", @@ -1299,8 +1299,8 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only) (float)pasteX, (float)pasteY, (float)dlssWidthOut, (float)dlssHeightOut, featherPixels, 0.0f, - 0.0f, 0.0f, // SrcUVOrigin: (0,0) for per-eye textures (identity) - 1.0f, 1.0f, // SrcUVScale: (1,1) for per-eye textures (identity) + 0.0f, 0.0f, // SrcUVOrigin: (0,0) for per-eye textures (identity) + 1.0f, 1.0f, // SrcUVScale: (1,1) for per-eye textures (identity) {} }; memcpy(mapped.pData, &cbData, sizeof(cbData)); @@ -2037,7 +2037,6 @@ std::vector Upscaling::GetActiveConstraints() co return constraints; } - /** * @brief Retrieves the current frame time for frame generation. * @@ -2211,9 +2210,7 @@ void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) state->BeginPerfEvent("Upscaling"); // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) - ID3D11Resource* colorSrc = colorSourceOverride - ? static_cast(colorSourceOverride) - : static_cast(main.texture); + ID3D11Resource* colorSrc = colorSourceOverride ? static_cast(colorSourceOverride) : static_cast(main.texture); if (upscaleMethod == UpscaleMethod::kDLSS) { streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); @@ -2575,4 +2572,3 @@ void Upscaling::BSFaceGenManager_UpdatePendingCustomizationTextures::thunk() func(); runtimeData.dynamicResolutionLock = 0; } - diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index 0cf39ee719..a57bb28b6f 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -57,12 +57,12 @@ struct Upscaling : Feature uint streamlineLogLevel = 0; // 0=Off, 1=Default, 2=Verbose float sharpnessFSR = 0.0f; float sharpnessDLSS = 0.0f; - uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M - uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M + uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) - uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) - float vrDlssCropOffsetX = 0.0f; // 0.0-0.3, nasal offset fraction for DLSS crop position - float vrDlssFeatherWidth = 0.0f; // 0.0-0.1, feather width fraction at DLSS crop boundary (disabled pending fix) + uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) + float vrDlssCropOffsetX = 0.0f; // 0.0-0.3, nasal offset fraction for DLSS crop position + float vrDlssFeatherWidth = 0.0f; // 0.0-0.1, feather width fraction at DLSS crop boundary (disabled pending fix) }; Settings settings; @@ -161,8 +161,8 @@ struct Upscaling : Feature eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) // Periphery TAA (conductor approach) — used by two-call func() flow - winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) - eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) + winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) + eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) winrt::com_ptr vrPeripheryFillCS; @@ -179,9 +179,9 @@ struct Upscaling : Feature winrt::com_ptr vrFeatheredCompositeBlendState; // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) - winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) - winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) - winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params + winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) + winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) + winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params ID3D11PixelShader* GetDlssCompositePS(); ID3D11PixelShader* GetDlssUpscalePS(); diff --git a/src/Features/Upscaling/Streamline.cpp b/src/Features/Upscaling/Streamline.cpp index 304883e588..832f5714ae 100644 --- a/src/Features/Upscaling/Streamline.cpp +++ b/src/Features/Upscaling/Streamline.cpp @@ -630,8 +630,8 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r // When viewport scaling, use crop-sized vrCropColorIn; otherwise use full vrIntermediateColorIn ID3D11Resource* colorInput = viewportScaling ? - upscaling.vrCropColorIn[i]->resource.get() : - upscaling.vrIntermediateColorIn[i]->resource.get(); + upscaling.vrCropColorIn[i]->resource.get() : + upscaling.vrIntermediateColorIn[i]->resource.get(); EvaluateDLSS(vp, i, colorInput, upscaling.vrIntermediateColorOut[i]->resource.get(), diff --git a/src/Globals.cpp b/src/Globals.cpp index 907b7dcba0..f4c245e3a4 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -324,8 +324,10 @@ namespace globals pDepthStencilView->GetResource(&clearRes); mainDepth.views[0]->GetResource(&mainRes); bool isMainDSV = (clearRes == mainRes); - if (clearRes) clearRes->Release(); - if (mainRes) mainRes->Release(); + if (clearRes) + clearRes->Release(); + if (mainRes) + mainRes->Release(); if (isMainDSV) { ClearFlags &= ~D3D11_CLEAR_STENCIL; if (ClearFlags == 0) From be768a65607a9dde2f6c1eb6af64aff469289956 Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 22 Mar 2026 11:50:29 -0600 Subject: [PATCH 15/16] feat(vr): SSS/SSGI VR optimizations + remove dead CAS shader Screen Space Shadows: reduced sample count for right eye when VRStereoOptimizations is active. StereoSync toggle exposed in UI. SSGI: bypass stereoSync when VRStereoOptimizations is active to prevent eye 0 contamination from sparse eye 1 data. Mode texture early-out in compute passes for culled eye 1 pixels. Remove dead CASCS.hlsl (CAS feature fully removed). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Shaders/ScreenSpaceGI/blur.cs.hlsl | 18 +++ .../Shaders/ScreenSpaceGI/gi.cs.hlsl | 13 +++ .../ScreenSpaceGI/radianceDisocc.cs.hlsl | 14 +++ .../Shaders/ScreenSpaceGI/stereoSync.cs.hlsl | 15 +++ .../Shaders/ScreenSpaceGI/upsample.cs.hlsl | 18 +++ .../ScreenSpaceShadows/RaymarchCS.hlsl | 5 +- .../ScreenSpaceShadows.hlsli | 76 ++++++++++++- .../ScreenSpaceShadows/StereoSyncCS.hlsl | 16 +++ .../ScreenSpaceShadows/bend_sss_gpu.hlsli | 58 +++++----- package/Shaders/VR/CASCS.hlsl | 74 ------------ src/Features/ScreenSpaceGI.cpp | 24 +++- src/Features/ScreenSpaceShadows.cpp | 107 +++++++++++++----- src/Features/ScreenSpaceShadows.h | 9 +- 13 files changed, 310 insertions(+), 137 deletions(-) delete mode 100644 package/Shaders/VR/CASCS.hlsl diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl index 46e34b175e..357f9efd28 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl @@ -19,6 +19,10 @@ RWTexture2D outAccumFrames : register(u0); RWTexture2D outIlY : register(u1); RWTexture2D outIlCoCg : register(u2); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + // samples = 8, min distance = 0.5, average samples on radius = 2 static const float3 g_Poisson8[8] = { float3(-0.4706069, -0.4427112, +0.6461146), @@ -88,6 +92,20 @@ float2x2 getRotationMatrix(float noise) // Early exit if dispatch thread is outside frame bounds if (any(dtid >= uint2(OUT_FRAME_DIM))) return; + +#if defined(VR_STEREO_OPT) + { + float2 uv = (dtid + .5) * RCP_OUT_FRAME_DIM; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) + return; + } + } +#endif + const float2 frameScale = FrameDim * RcpTexDim; float radius = BlurRadius; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl index ebc8b08956..0800601eae 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl @@ -46,6 +46,10 @@ Texture2D srcPrevY : register(t6); // maybe half-res Texture2D srcPrevCoCg : register(t7); // maybe half-res Texture2D srcPrevGISpecular : register(t8); // maybe half-res +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D outAo : register(u0); RWTexture2D outY : register(u1); RWTexture2D outCoCg : register(u2); @@ -343,6 +347,15 @@ void CalculateGI( float2 uv = (pxCoord + .5) * RCP_OUT_FRAME_DIM; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) + return; + } +#endif + float viewspaceZ = READ_DEPTH(srcWorkingDepth, pxCoord); float2 normalSample = FULLRES_LOAD(srcNormalRoughness, pxCoord, uv * frameScale, samplerLinearClamp).xy; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl index 47317a1c87..af14abf417 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl @@ -15,6 +15,10 @@ Texture2D srcPrevIlY : register(t7); // maybe half-res Texture2D srcPrevIlCoCg : register(t8); // maybe half-res Texture2D srcPrevGISpecular : register(t9); // maybe half-res +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D outRadianceDisocc : register(u0); RWTexture2D outAccumFrames : register(u1); RWTexture2D outRemappedAo : register(u2); @@ -75,6 +79,16 @@ void readHistory( const float2 uv = (pixCoord + .5) * RCP_OUT_FRAME_DIM; const uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) + return; + } +#endif + const float2 screen_pos = Stereo::ConvertFromStereoUV(uv, eyeIndex); float2 prev_screen_pos = screen_pos; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl index 365e50236f..e3c8966df8 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl @@ -17,6 +17,10 @@ Texture2D srcAo : register(t1); Texture2D srcIlY : register(t2); Texture2D srcIlCoCg : register(t3); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D outAo : register(u0); RWTexture2D outIlY : register(u1); RWTexture2D outIlCoCg : register(u2); @@ -59,6 +63,17 @@ float4 SampleCrossDepths(float2 centerUV, float2 step, float2 texScale, uint eye uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) { + Passthrough(dtid); + return; + } + } +#endif + // SSGI working depth is linear view-space Z. // 0.0 = mask (outside lens area). FP_Z = first-person hands threshold (~18.0). float depth = srcDepth.SampleLevel(samplerPointClamp, uv * frameScale, RES_MIP); diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl index 9a7015db59..6e4913399d 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl @@ -1,6 +1,7 @@ // depth-aware upsampling: https://gist.github.com/pixelmager/a4364ea18305ed5ca707d89ddc5f8743 #include "Common/FastMath.hlsli" +#include "Common/VR.hlsli" #include "ScreenSpaceGI/common.hlsli" Texture2D srcDepth : register(t0); @@ -9,6 +10,10 @@ Texture2D srcIlY : register(t2); // half-res Texture2D srcIlCoCg : register(t3); // half-res Texture2D srcGiSpecular : register(t4); // half-res +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D outAo : register(u0); RWTexture2D outIlY : register(u1); RWTexture2D outIlCoCg : register(u2); @@ -23,6 +28,19 @@ RWTexture2D outGiSpecular : register(u3); // Early exit if dispatch thread is outside frame bounds if (any(dtid >= uint2(FrameDim))) return; + +#if defined(VR_STEREO_OPT) + { + float2 uv = (dtid + .5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[dtid]; + if (mode == 1 || mode == 2) + return; + } + } +#endif + #ifdef HALF_RES int2 px00 = (dtid >> 1) + (dtid & 1) - 1; #else // QUARTER_RES diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl index 132ad940b1..19982b474b 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl @@ -50,7 +50,10 @@ cbuffer PerFrame : register(b1) parameters.DynamicRes = DynamicRes; - parameters.UsePrecisionOffset = true; + // VR note: precision offset adds a depth bias that can cause subtle shadow + // shifting. Disabled to match the old (stable) SSS implementation. + // See: docs/development/Old code/RaymarchCS.hlsl + parameters.UsePrecisionOffset = false; WriteScreenSpaceShadow(parameters, groupID, groupThreadID); } \ No newline at end of file diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli index 0d1f221726..b4a70a1fdf 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli @@ -1,10 +1,82 @@ +// Screen Space Shadows consumption helper. +// Non-VR: depth-weighted 4-sample Poisson blur for spatial denoising. +// VR: direct Load — the Poisson blur's per-pixel noise rotation is +// screen-position-dependent, causing shadows to shift on camera movement. +// Without TAA to average out the rotation noise, the instability hits +// the final output directly. Matches the stable v1.2 VR implementation. + +#include "Common/Math.hlsli" namespace ScreenSpaceShadows { Texture2D ScreenSpaceShadowsTexture : register(t45); + float4 GetBlurWeights(float4 depths, float centerDepth) + { + centerDepth += 1.0; + float depthSharpness = saturate((1024.0 * 1024.0) / (centerDepth * centerDepth)); + float4 depthDifference = (depths - centerDepth) * depthSharpness; + return exp2(-depthDifference * depthDifference); + } + float GetScreenSpaceShadow(float3 screenPosition, float2 uv, float noise, uint eyeIndex) { - return ScreenSpaceShadowsTexture.Load(int3(int2(screenPosition.xy + 0.5f), 0)).x; +#if defined(VR) + // VR: direct sample, no spatial blur. The Poisson blur's per-pixel noise + // rotation is screen-position-dependent — camera movement changes the + // rotation angle for the same world surface, causing shadows to visually + // shift. Without TAA to average out the rotation noise, the per-frame + // instability hits the final output directly. Direct Load avoids this. + // Matches the stable v1.2 VR implementation. + return ScreenSpaceShadowsTexture.Load(int3(screenPosition.xy, 0)); +#else + // Flat: depth-weighted 4-sample Poisson blur for spatial denoising. + // Rotated per-pixel by screen-space noise to break structured patterns. + // TAA averages out the rotation noise across frames. + noise *= Math::TAU; + + half2x2 rotationMatrix = half2x2(cos(noise), sin(noise), -sin(noise), cos(noise)); + + float4 shadowSamples = 0; + float4 depthSamples = 0; + +# if defined(DEFERRED) && !defined(DO_ALPHA_TEST) + depthSamples[0] = screenPosition.z; +# else + depthSamples[0] = SharedData::DepthTexture.Load(int3(screenPosition.xy, 0)); +# endif + + shadowSamples[0] = ScreenSpaceShadowsTexture.Load(int3(screenPosition.xy, 0)); + + static const float2 BlurOffsets[3] = { + float2(-0.6720635096678028f, 0.6601738628451107f), + float2(0.6110340335380645f, 0.5269905984201742f), + float2(0.20239029763403027f, -0.7841160574831084f), + }; + + [unroll] for (uint i = 1; i < 4; i++) + { + float2 offset = mul(BlurOffsets[i - 1], rotationMatrix) * 0.0025; + + float2 sampleUV = uv + offset; + sampleUV = saturate(sampleUV); + + int3 sampleCoord = SharedData::ConvertUVToSampleCoord(sampleUV, eyeIndex); + + depthSamples[i] = SharedData::DepthTexture.Load(sampleCoord).x; + shadowSamples[i] = ScreenSpaceShadowsTexture.Load(sampleCoord); + } + + depthSamples = SharedData::GetScreenDepths(depthSamples); + + float4 blurWeights = GetBlurWeights(depthSamples, depthSamples[0]); + float shadow = dot(shadowSamples, blurWeights); + + float blurWeightsTotal = dot(blurWeights, 1.0); + [flatten] if (blurWeightsTotal > 0.0) + shadow = shadow / blurWeightsTotal; + + return shadow; +#endif } -} \ No newline at end of file +} diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl index 92f0066261..3079a390eb 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl @@ -15,6 +15,10 @@ Texture2D SrcDepthTexture : register(t0); Texture2D SrcShadowTexture : register(t1); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D OutShadowTexture : register(u0); cbuffer StereoSyncCB : register(b1) @@ -90,6 +94,18 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); +#if defined(VR_STEREO_OPT) + // Eye 1 pixels with mode 1 (edge) or 2 (main) will be overwritten by StereoBlend + // reprojection, so skip the expensive stereo sync work and write neutral (unshadowed). + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[uint2(dtid.xy)] & 0x0F; + if (mode == 1 || mode == 2) { + OutShadowTexture[dtid] = 1.0; // 1.0 = no shadow (neutral) + return; + } + } +#endif + float depth = SrcDepthTexture[dtid]; // depth == 0: VR HMD mask; depth == 1: sky/far plane diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli index 5a569d732f..cec2ae8e7b 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli @@ -225,17 +225,15 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int // We sample depth twice per pixel per sample, and interpolate with an edge detect filter // Interpolation should only occur on the minor axis of the ray - major axis coordinates should be at pixel centers half2 read_xy = floor(pixel_xy); - - read_xy *= inParameters.DynamicRes; - -#if defined(VR) - read_xy *= half2(0.5, 1.0); -#endif + // VR fix: do NOT pre-scale read_xy here. DynamicRes and VR 0.5x must be + // applied AFTER offset_xy addition so the bilinear neighbor is exactly + // 1 texel away. Pre-scaling causes the offset to sample ~3px away, + // breaking edge detection and causing shadow instability on camera movement. + // See: docs/development/Old code/bend_sss_gpu.hlsli for the correct ordering. half minor_axis = x_axis_major ? pixel_xy.y : pixel_xy.x; - // If a pixel has been detected as an edge, then optionally (inParameters.IgnoreEdgePixels) don't include it in the shadow - const half edge_skip = 1e20; // if edge skipping is enabled, apply an extreme value/blend on edge samples to push the value out of range + const half edge_skip = 1e20; half2 depths; half bilinear = frac(minor_axis) - 0.5; @@ -247,34 +245,47 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int half bias = bilinear > 0 ? 1 : -1; half2 offset_xy = half2(x_axis_major ? 0 : bias, x_axis_major ? bias : 0); - // HLSL enforces that a pixel offset is a compile-time constant, which isn't strictly required (and can sometimes be a bit faster) - // So this fallback will use a manual uv offset instead - half2 coord = read_xy * inParameters.InvDepthTextureSize; - half2 coord_with_offset = (read_xy + offset_xy) * inParameters.InvDepthTextureSize; + // VR fix: scale by DynamicRes AFTER offset_xy is incorporated, so the + // offset represents exactly 1 texel in the final UV space. + half2 coord = read_xy * inParameters.InvDepthTextureSize * inParameters.DynamicRes; + half2 coord_with_offset = (read_xy + offset_xy) * inParameters.InvDepthTextureSize * inParameters.DynamicRes; #if defined(VR) + // VR side-by-side: halve x to map stereo pixel coords to texture UV + coord *= half2(0.5, 1.0); + coord_with_offset *= half2(0.5, 1.0); + # if defined(RIGHT) - // Right eye: valid UV range is [0.5, 1.0] + // Right eye: valid UV range is [0.5*DynRes.x, DynRes.x] bool coord_out_of_eye = coord.x < 0.5 * inParameters.DynamicRes.x; bool coord_offset_out_of_eye = coord_with_offset.x < 0.5 * inParameters.DynamicRes.x; # else - // Left eye: valid UV range is [0.0, 0.5) + // Left eye: valid UV range is [0.0, 0.5*DynRes.x) bool coord_out_of_eye = coord.x >= 0.5 * inParameters.DynamicRes.x; bool coord_offset_out_of_eye = coord_with_offset.x >= 0.5 * inParameters.DynamicRes.x; # endif + // Clamp cross-eye depth reads to FarDepthValue (1.0) so rays near the SBS + // center seam don't sample the other eye's depth. At distance, stereo parallax + // makes cross-eye depth noticeably different, causing shadow patterns to shift + // with camera movement. Clamping to 1.0 means the ray sees “no occluder” at + // the boundary — shadow weakens by ~1 pixel but stays temporally stable. + // The WRITE guard is intentionally removed (see below GroupMemoryBarrier section) + // so both dispatches write to the seam overlap, preventing a visible gap/line. depths.x = coord_out_of_eye ? 1.0 : inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord, 0); depths.y = coord_offset_out_of_eye ? 1.0 : inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord_with_offset, 0); - depths.x = lerp(depths.x, 1.0, (float)(depths.x == 0)); // Stencil area - depths.y = lerp(depths.y, 1.0, (float)(depths.y == 0)); // Stencil area + // VR HMD mask: depth==0 is outside the visible lens area. Remap to + // FarDepthValue (1.0) so mask pixels don't cast false shadows. + depths.x = lerp(depths.x, 1.0, (float)(depths.x == 0)); + depths.y = lerp(depths.y, 1.0, (float)(depths.y == 0)); #else depths.x = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord, 0); depths.y = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord_with_offset, 0); #endif // Depth thresholds (bilinear/shadow thickness) are based on a fractional ratio of the difference between sampled depth and the far clip depth - depth_thickness_scale[i] = abs(inParameters.FarDepthValue - depths.x); + depth_thickness_scale[i] = max(abs(inParameters.FarDepthValue - depths.x), 1e-4); // If depth variance is more than a specific threshold, then just use point filtering bool use_point_filter = abs(depths.x - depths.y) > depth_thickness_scale[i] * inParameters.BilinearThreshold; @@ -321,18 +332,6 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int // Sync wavefronts now groupshared DepthData is written GroupMemoryBarrierWithGroupSync(); -#if defined(VR) - // Check if the pixel we're writing to is on the correct eye side - half writeX = write_xy.x * inParameters.InvDepthTextureSize.x; - -# if defined(RIGHT) - if (writeX < 0.0) - return; -# else - if (writeX > 1.0) - return; -# endif -#endif half start_depth = sampling_depth[0]; @@ -381,5 +380,6 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int // Asking the GPU to write scattered single-byte pixels isn't great, // But thankfully the latency is hidden by all the work we're doing... + inParameters.OutputTexture[(int2)write_xy] = result; } \ No newline at end of file diff --git a/package/Shaders/VR/CASCS.hlsl b/package/Shaders/VR/CASCS.hlsl deleted file mode 100644 index 6ed2768e43..0000000000 --- a/package/Shaders/VR/CASCS.hlsl +++ /dev/null @@ -1,74 +0,0 @@ -// AMD Contrast Adaptive Sharpening (CAS) - Sharpen-only for VR -// Based on AMD FidelityFX CAS (sharpen-only path) -// Reference: https://gpuopen.com/fidelityfx-cas/ -// -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -// CASParams[0] = sharpness (0.0 = no sharpening, 1.0 = maximum sharpening) -StructuredBuffer CASParams : register(t1); - -Texture2D Source : register(t0); -RWTexture2D Dest : register(u0); - -[numthreads(8, 8, 1)] void main(uint3 DTid : SV_DispatchThreadID) { - uint2 texDim; - Dest.GetDimensions(texDim.x, texDim.y); - - if (DTid.x >= texDim.x || DTid.y >= texDim.y) - return; - - float sharpness = CASParams[0]; - - // Fetch 3x3 neighborhood - int2 sp = int2(DTid.xy); - float3 a = Source.Load(int3(sp + int2(-1, -1), 0)).rgb; - float3 b = Source.Load(int3(sp + int2(0, -1), 0)).rgb; - float3 c = Source.Load(int3(sp + int2(1, -1), 0)).rgb; - float3 d = Source.Load(int3(sp + int2(-1, 0), 0)).rgb; - float3 e = Source.Load(int3(sp, 0)).rgb; - float3 f = Source.Load(int3(sp + int2(1, 0), 0)).rgb; - float3 g = Source.Load(int3(sp + int2(-1, 1), 0)).rgb; - float3 h = Source.Load(int3(sp + int2(0, 1), 0)).rgb; - float3 i = Source.Load(int3(sp + int2(1, 1), 0)).rgb; - - // Soft min/max of cross neighborhood - float3 mnRGB = min(min(min(d, e), min(f, b)), h); - float3 mxRGB = max(max(max(d, e), max(f, b)), h); - - // Expand with diagonal neighbors for soft min/max - float3 mnRGB2 = min(min(a, c), min(g, i)); - float3 mxRGB2 = max(max(a, c), max(g, i)); - mnRGB += mnRGB2; - mxRGB += mxRGB2; - - // Adaptive sharpening amount - float3 ampRGB = saturate(min(mnRGB, 2.0 - mxRGB) * rcp(max(mxRGB, 1e-4))); - ampRGB = rsqrt(ampRGB); - - // Peak controls sharpening strength: - // sharpness 0.0 -> peak 8.0 (no sharpening) - // sharpness 1.0 -> peak 5.0 (maximum sharpening) - float peak = -3.0 * sharpness + 8.0; - float3 wRGB = -rcp(ampRGB * peak); - float3 rcpWeightRGB = rcp(4.0 * wRGB + 1.0); - - // Apply sharpening filter - float3 outColor = saturate(((b + d) + (f + h)) * wRGB + e) * rcpWeightRGB; - - Dest[DTid.xy] = float4(outColor, 1.0); -} diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp index 458beccb23..fabd62ecfd 100644 --- a/src/Features/ScreenSpaceGI.cpp +++ b/src/Features/ScreenSpaceGI.cpp @@ -5,6 +5,7 @@ #include "Deferred.h" #include "State.h" #include "Util.h" +#include "VRStereoOptimizations.h" NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( ScreenSpaceGI::Settings, @@ -593,6 +594,8 @@ void ScreenSpaceGI::CompileComputeShaders() for (auto& info : shaderInfos) { if (REL::Module::IsVR()) info.defines.push_back({ "VR", "" }); + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + info.defines.push_back({ "VR_STEREO_OPT", "" }); if (settings.ResolutionMode == 1) info.defines.push_back({ "HALF_RES", "" }); if (settings.ResolutionMode == 2) @@ -743,6 +746,15 @@ void ScreenSpaceGI::DrawSSGI() context->CSSetConstantBuffers(5, 1, &sharedDataBuf); context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); + // Bind VRStereoOptimizations mode texture for Eye 1 compute culling + auto& vrStereoOpt = globals::features::vrStereoOptimizations; + bool stereoOptActive = REL::Module::IsVR() && vrStereoOpt.loaded && vrStereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + if (stereoOptActive) { + ID3D11ShaderResourceView* modeSRV = vrStereoOpt.GetModeTextureSRV(); + if (modeSRV) + context->CSSetShaderResources(16, 1, &modeSRV); + } + // prefilter depths { TracyD3D11Zone(globals::state->tracyCtx, "SSGI - Prefilter Depths"); @@ -873,7 +885,11 @@ void ScreenSpaceGI::DrawSSGI() // VR stereo sync: bilateral blend of SSGI buffers between eyes // Shi, Billeter, Eisemann 2022, "Stereo-consistent screen-space ambient occlusion" - if (REL::Module::IsVR() && stereoSyncCompute) { + bool useStereoOpt = REL::Module::IsVR() && + globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + + if (REL::Module::IsVR() && stereoSyncCompute && !useStereoOpt) { TracyD3D11Zone(globals::state->tracyCtx, "SSGI - Stereo Sync"); if (globals::state->frameAnnotations) @@ -930,6 +946,12 @@ void ScreenSpaceGI::DrawSSGI() // cleanup resetViews(); + // Unbind VRStereoOptimizations mode texture SRV + if (stereoOptActive) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources(16, 1, &nullSRV); + } + samplers.fill(nullptr); cb = nullptr; diff --git a/src/Features/ScreenSpaceShadows.cpp b/src/Features/ScreenSpaceShadows.cpp index 6f1a8194d9..55549c105d 100644 --- a/src/Features/ScreenSpaceShadows.cpp +++ b/src/Features/ScreenSpaceShadows.cpp @@ -1,6 +1,7 @@ #include "ScreenSpaceShadows.h" #include "State.h" +#include "VRStereoOptimizations.h" #pragma warning(push) #pragma warning(disable: 4838 4244) @@ -40,13 +41,13 @@ void ScreenSpaceShadows::DrawSettings() if (auto _tt = Util::HoverTooltipWrapper()) ImGui::Text("Contrast boost for the shadow transition. Higher values produce harder shadow edges."); - if (globals::game::isVR && globals::state->IsDeveloperMode()) { + if (globals::game::isVR) { ImGui::Checkbox("VR Stereo Sync", &enableStereoSync); if (auto _tt = Util::HoverTooltipWrapper()) ImGui::Text( "Synchronizes shadow data between left and right eyes via bilateral reprojection " "and applies a depth-weighted blur to reduce per-eye noise. " - "Uses min-blend so if either eye detects an occluder, the shadow is preserved. "); + "Uses min-blend so if either eye detects an occluder, the shadow is preserved."); } ImGui::Spacing(); @@ -65,6 +66,10 @@ void ScreenSpaceShadows::InvalidateRaymarchShaders() raymarchRightCS->Release(); raymarchRightCS = nullptr; } + if (raymarchRightReducedCS) { + raymarchRightReducedCS->Release(); + raymarchRightReducedCS = nullptr; + } } void ScreenSpaceShadows::ClearShaderCache() @@ -78,23 +83,13 @@ void ScreenSpaceShadows::ClearShaderCache() uint ScreenSpaceShadows::GetScaledSampleCount() { - float2 renderSize = Util::ConvertToDynamic(globals::state->screenSize); - - // In VR, renderSize covers both eyes side-by-side; raymarch dispatches per-eye - if (globals::game::isVR) - renderSize.x /= 2.0f; - - // Scale sample count based on both dimensions relative to 1920x1080 reference - float2 referenceRes = { 1920.0f, 1080.0f }; - float referenceArea = referenceRes.x * referenceRes.y; - float currentArea = renderSize.x * renderSize.y; - float areaScale = std::sqrt(currentArea / referenceArea); - uint scaledSampleCount = static_cast(std::round(bendSettings.SampleCount * 60 * areaScale)); - - // Quantize to steps of 8 to prevent frequent recompilation from small DRS oscillations - scaledSampleCount = ((scaledSampleCount + 7u) / 8u) * 8u; - scaledSampleCount = std::max(scaledSampleCount, 8u); - + // Shadow reach in pixels is resolution-independent: a tree branch casts + // the same pixel-length shadow at 1080p and 3000p. Sample count controls + // reach, not quality-per-pixel. The old formula (multiplier * 64) was + // correct; the area-based scaling produced 2-8x more samples at VR + // resolution with no quality benefit, only GPU cost. + // Always produces WAVE_SIZE-aligned counts for correct Bend READ_COUNT. + uint scaledSampleCount = bendSettings.SampleCount * 64; return scaledSampleCount; } @@ -117,11 +112,44 @@ ID3D11ComputeShader* ScreenSpaceShadows::GetComputeRaymarchRight() { if (!raymarchRightCS) { uint scaledSampleCount = GetScaledSampleCount(); - raymarchRightCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", { { "SAMPLE_COUNT", std::format("{}", scaledSampleCount).c_str() }, { "RIGHT", "" } }, "cs_5_0"); + auto sampleCountStr = std::format("{}", scaledSampleCount); + std::vector> defines = { + { "SAMPLE_COUNT", sampleCountStr.c_str() }, + { "RIGHT", "" } + }; + raymarchRightCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", defines, "cs_5_0"); } return raymarchRightCS; } +ID3D11ComputeShader* ScreenSpaceShadows::GetComputeRaymarchRightReduced() +{ + uint fullCount = GetScaledSampleCount(); + uint divisor = (stereoOptRightEyeReduction == 1) ? 4 : 2; + uint reducedCount = std::max(fullCount / divisor, 64u); + // Quantize to WAVE_SIZE (64) for clean READ_COUNT in Bend's algorithm + reducedCount = ((reducedCount + 63u) / 64u) * 64u; + + if (reducedCount != lastCompiledReducedSampleCount) { + lastCompiledReducedSampleCount = reducedCount; + if (raymarchRightReducedCS) { + raymarchRightReducedCS->Release(); + raymarchRightReducedCS = nullptr; + } + } + + if (!raymarchRightReducedCS) { + auto sampleCountStr = std::format("{}", reducedCount); + std::vector> defines = { + { "SAMPLE_COUNT", sampleCountStr.c_str() }, + { "RIGHT", "" } + }; + raymarchRightReducedCS = (ID3D11ComputeShader*)Util::CompileShader( + L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", defines, "cs_5_0"); + } + return raymarchRightReducedCS; +} + void ScreenSpaceShadows::DrawShadows() { ZoneScoped; @@ -148,6 +176,7 @@ void ScreenSpaceShadows::DrawShadows() auto lightProjectionF = CalculateLightProjection(0); float2 renderSize = Util::ConvertToDynamic(state->screenSize); + int viewportSize[2] = { (int)renderSize.x, (int)renderSize.y }; if (globals::game::isVR) @@ -156,12 +185,11 @@ void ScreenSpaceShadows::DrawShadows() int minRenderBounds[2] = { 0, 0 }; int maxRenderBounds[2] = { viewportSize[0], viewportSize[1] }; - // Setup common render state auto* depthSRV = Util::GetCurrentSceneDepthSRV(); - context->CSSetShaderResources(0, 1, &depthSRV); + auto* shadowUAV = screenSpaceShadowsTexture->uav.get(); - auto uav = screenSpaceShadowsTexture->uav.get(); - context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + context->CSSetShaderResources(0, 1, &depthSRV); + context->CSSetUnorderedAccessViews(0, 1, &shadowUAV, nullptr); context->CSSetSamplers(0, 1, &pointBorderSampler); @@ -170,7 +198,8 @@ void ScreenSpaceShadows::DrawShadows() auto viewport = globals::game::graphicsState; - float2 dynamicRes = { viewport->GetRuntimeData().dynamicResolutionWidthRatio, viewport->GetRuntimeData().dynamicResolutionHeightRatio }; + float2 dynamicRes = { viewport->GetRuntimeData().dynamicResolutionWidthRatio, + viewport->GetRuntimeData().dynamicResolutionHeightRatio }; // Shared dispatch logic for both VR and non-VR auto DispatchEye = [&](const char* eyeName, ID3D11ComputeShader* shader, const float* lightProj, @@ -228,9 +257,21 @@ void ScreenSpaceShadows::DrawShadows() } else { DispatchEye("Left Eye", GetComputeRaymarch(), lightProjectionF.data(), InvTexSizeX, InvTexSizeY); - // Calculate light projection for right eye auto lightProjectionRightF = CalculateLightProjection(1); - DispatchEye("Right Eye", GetComputeRaymarchRight(), lightProjectionRightF.data(), InvTexSizeX, InvTexSizeY); + + bool useStereoOpt = REL::Module::IsVR() && + globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + + if (useStereoOpt) { + // Reduced sample count for right eye — StereoBlend overwrites most of it + DispatchEye("Right Eye (Reduced)", GetComputeRaymarchRightReduced(), + lightProjectionRightF.data(), InvTexSizeX, InvTexSizeY); + } else { + // Full sample count + DispatchEye("Right Eye", GetComputeRaymarchRight(), + lightProjectionRightF.data(), InvTexSizeX, InvTexSizeY); + } } ID3D11ShaderResourceView* views[1]{ nullptr }; @@ -326,16 +367,26 @@ void ScreenSpaceShadows::Prepass() void ScreenSpaceShadows::LoadSettings(json& o_json) { bendSettings = o_json; + if (o_json.contains("StereoOptRightEyeReduction")) + stereoOptRightEyeReduction = o_json["StereoOptRightEyeReduction"]; + if (o_json.contains("EnableStereoSync")) + enableStereoSync = o_json["EnableStereoSync"].get(); } void ScreenSpaceShadows::SaveSettings(json& o_json) { o_json = bendSettings; + o_json["StereoOptRightEyeReduction"] = stereoOptRightEyeReduction; + o_json["EnableStereoSync"] = enableStereoSync; } void ScreenSpaceShadows::RestoreDefaultSettings() { bendSettings = {}; + stereoOptRightEyeReduction = 0; + enableStereoSync = false; + if (globals::game::isVR) + bendSettings.SampleCount = 2; } bool ScreenSpaceShadows::HasShaderDefine(RE::BSShader::Type) @@ -346,7 +397,6 @@ bool ScreenSpaceShadows::HasShaderDefine(RE::BSShader::Type) void ScreenSpaceShadows::SetupResources() { raymarchCB = new ConstantBuffer(ConstantBufferDesc()); - if (globals::game::isVR) { stereoSyncCB = new ConstantBuffer(ConstantBufferDesc()); } @@ -398,4 +448,5 @@ void ScreenSpaceShadows::SetupResources() stereoSyncCopyTex->CreateSRV(srvDesc); } } + } diff --git a/src/Features/ScreenSpaceShadows.h b/src/Features/ScreenSpaceShadows.h index de9b8e1bd4..830d727342 100644 --- a/src/Features/ScreenSpaceShadows.h +++ b/src/Features/ScreenSpaceShadows.h @@ -35,7 +35,7 @@ struct ScreenSpaceShadows : Feature float BilinearThreshold = 0.02f; float ShadowContrast = !globals::game::isVR ? 1.0f : 4.0f; uint Enable = 1; - uint SampleCount = 1; + uint SampleCount = !globals::game::isVR ? 1u : 2u; uint pad0[3]; }; @@ -62,7 +62,7 @@ struct ScreenSpaceShadows : Feature }; STATIC_ASSERT_ALIGNAS_16(RaymarchCB); - bool enableStereoSync = true; + bool enableStereoSync = false; struct alignas(16) StereoSyncCB { @@ -71,11 +71,15 @@ struct ScreenSpaceShadows : Feature }; STATIC_ASSERT_ALIGNAS_16(StereoSyncCB); + int stereoOptRightEyeReduction = 0; // 0 = Half, 1 = Quarter sample count + ID3D11SamplerState* pointBorderSampler = nullptr; ConstantBuffer* raymarchCB = nullptr; ID3D11ComputeShader* raymarchCS = nullptr; ID3D11ComputeShader* raymarchRightCS = nullptr; + ID3D11ComputeShader* raymarchRightReducedCS = nullptr; + uint lastCompiledReducedSampleCount = 0; Texture2D* screenSpaceShadowsTexture = nullptr; @@ -94,6 +98,7 @@ struct ScreenSpaceShadows : Feature uint lastCompiledSampleCount = 0; ID3D11ComputeShader* GetComputeRaymarch(); ID3D11ComputeShader* GetComputeRaymarchRight(); + ID3D11ComputeShader* GetComputeRaymarchRightReduced(); virtual void Prepass() override; From 680ce74a4b139485f045d808efb0b8bdee2ae1c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 22 Mar 2026 17:52:28 +0000 Subject: [PATCH 16/16] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commi?= =?UTF-8?q?t.ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- .../Shaders/ScreenSpaceGI/stereoSync.cs.hlsl | 8 ++++---- .../Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl | 8 ++++---- .../Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli | 1 - src/Features/ScreenSpaceGI.cpp | 4 ++-- src/Features/ScreenSpaceShadows.cpp | 5 ++--- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl index e3c8966df8..3c5cc748cf 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl @@ -17,9 +17,9 @@ Texture2D srcAo : register(t1); Texture2D srcIlY : register(t2); Texture2D srcIlCoCg : register(t3); -#if defined(VR_STEREO_OPT) +# if defined(VR_STEREO_OPT) Texture2D StereoOptModeTexture : register(t16); -#endif +# endif RWTexture2D outAo : register(u0); RWTexture2D outIlY : register(u1); @@ -63,7 +63,7 @@ float4 SampleCrossDepths(float2 centerUV, float2 step, float2 texScale, uint eye uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); -#if defined(VR_STEREO_OPT) +# if defined(VR_STEREO_OPT) if (eyeIndex == 1) { uint2 fullResPx = uint2(uv * FrameDim); uint mode = StereoOptModeTexture[fullResPx]; @@ -72,7 +72,7 @@ float4 SampleCrossDepths(float2 centerUV, float2 step, float2 texScale, uint eye return; } } -#endif +# endif // SSGI working depth is linear view-space Z. // 0.0 = mask (outside lens area). FP_Z = first-person hands threshold (~18.0). diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl index 3079a390eb..67dee6957e 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl @@ -15,9 +15,9 @@ Texture2D SrcDepthTexture : register(t0); Texture2D SrcShadowTexture : register(t1); -#if defined(VR_STEREO_OPT) +# if defined(VR_STEREO_OPT) Texture2D StereoOptModeTexture : register(t16); -#endif +# endif RWTexture2D OutShadowTexture : register(u0); @@ -94,7 +94,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); -#if defined(VR_STEREO_OPT) +# if defined(VR_STEREO_OPT) // Eye 1 pixels with mode 1 (edge) or 2 (main) will be overwritten by StereoBlend // reprojection, so skip the expensive stereo sync work and write neutral (unshadowed). if (eyeIndex == 1) { @@ -104,7 +104,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) return; } } -#endif +# endif float depth = SrcDepthTexture[dtid]; diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli index cec2ae8e7b..6ec8ed5316 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli @@ -332,7 +332,6 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int // Sync wavefronts now groupshared DepthData is written GroupMemoryBarrierWithGroupSync(); - half start_depth = sampling_depth[0]; if (start_depth == 0.0 || start_depth == 1.0) diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp index fabd62ecfd..a20ef95254 100644 --- a/src/Features/ScreenSpaceGI.cpp +++ b/src/Features/ScreenSpaceGI.cpp @@ -886,8 +886,8 @@ void ScreenSpaceGI::DrawSSGI() // VR stereo sync: bilateral blend of SSGI buffers between eyes // Shi, Billeter, Eisemann 2022, "Stereo-consistent screen-space ambient occlusion" bool useStereoOpt = REL::Module::IsVR() && - globals::features::vrStereoOptimizations.loaded && - globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; if (REL::Module::IsVR() && stereoSyncCompute && !useStereoOpt) { TracyD3D11Zone(globals::state->tracyCtx, "SSGI - Stereo Sync"); diff --git a/src/Features/ScreenSpaceShadows.cpp b/src/Features/ScreenSpaceShadows.cpp index 55549c105d..f5f480b669 100644 --- a/src/Features/ScreenSpaceShadows.cpp +++ b/src/Features/ScreenSpaceShadows.cpp @@ -260,8 +260,8 @@ void ScreenSpaceShadows::DrawShadows() auto lightProjectionRightF = CalculateLightProjection(1); bool useStereoOpt = REL::Module::IsVR() && - globals::features::vrStereoOptimizations.loaded && - globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; if (useStereoOpt) { // Reduced sample count for right eye — StereoBlend overwrites most of it @@ -448,5 +448,4 @@ void ScreenSpaceShadows::SetupResources() stereoSyncCopyTex->CreateSRV(srvDesc); } } - }