From 3d3e003cb38962e00c44e28d526677e45c4435af Mon Sep 17 00:00:00 2001 From: vrnord Date: Sun, 22 Mar 2026 12:26:02 -0600 Subject: [PATCH 1/3] feat(vr): VR stereo reprojection optimizations Stencil-based Eye 1 culling with compute shader reprojection, folded into VR feature per maintainer request (not standalone). Core features: - Hardware stencil classification + NOT_EQUAL enforcement - Bilinear color sampling for stereo reprojection - POM depth-aware reprojection via Reflectance.w - StereoBlend overwrite mode for culled pixels - DeactivateStencil() on all early-exit paths Cleanup (PR #1982 review feedback addressed): - SharedData cbuffer: VR mip bias fields removed, pad adjusted - Eye 1 sub-pixel jitter removed - Foliage fringe suppression removed - CAS feature fully removed - pixelOffset TRUE_PBR export guard added - pixelOffset parallax fade-out discontinuity fixed - JSON settings loading hardened - Stencil dispatch guards for missing DSV resources - Dead code removed (BILINEAR_UPSCALE, vrTAAdPerEye, vrPreTAACopy) Replaces PR #1982. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../ExtendedMaterials/ExtendedMaterials.hlsli | 5 +- package/Shaders/Common/SharedData.hlsli | 10 +- package/Shaders/Common/VR.hlsli | 1 + package/Shaders/DeferredCompositeCS.hlsl | 14 + package/Shaders/Lighting.hlsl | 10 +- package/Shaders/RunGrass.hlsl | 1 - package/Shaders/VR/StereoBlendCS.hlsl | 241 ++++++- package/Shaders/VR/VRPostProcessCS.hlsl | 109 +++ .../VRStereoOptimizations/ReprojectionCS.hlsl | 55 ++ .../VRStereoOptimizations/StencilCS.hlsl | 153 ++++ .../VRStereoOptimizations/StencilWritePS.hlsl | 54 ++ .../VRStereoOptimizations/StencilWriteVS.hlsl | 24 + .../VRStereoOptimizations/cbuffers.hlsli | 31 + .../Shaders/VRStereoOptimizations/modes.hlsli | 10 + src/Deferred.cpp | 46 +- src/Features/ExtendedMaterials.h | 2 +- src/Features/VR.cpp | 29 +- src/Features/VR.h | 16 +- src/Features/VR/SettingsUI.cpp | 34 +- src/Features/VR/StereoBlend.cpp | 132 +++- src/Features/VRStereoOptimizations.cpp | 651 ++++++++++++++++++ src/Features/VRStereoOptimizations.h | 198 ++++++ src/Globals.cpp | 74 +- src/State.cpp | 1 + 24 files changed, 1829 insertions(+), 72 deletions(-) create mode 100644 package/Shaders/VR/VRPostProcessCS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/StencilCS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl create mode 100644 package/Shaders/VRStereoOptimizations/cbuffers.hlsli create mode 100644 package/Shaders/VRStereoOptimizations/modes.hlsli create mode 100644 src/Features/VRStereoOptimizations.cpp create mode 100644 src/Features/VRStereoOptimizations.h diff --git a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli index 4152bdb2d5..b8a4e3ccea 100644 --- a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli +++ b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli @@ -325,6 +325,7 @@ namespace ExtendedMaterials float2 GetParallaxCoords(float distance, float2 coords, float mipLevel, float3 viewDir, float3x3 tbn, float noise, Texture2D tex, SamplerState texSampler, uint channel, DisplacementParams params, out float pixelOffset) #endif { + pixelOffset = 0.5; float3 viewDirTS = normalize(mul(tbn, viewDir)); #if defined(LANDSCAPE) viewDirTS.xy /= viewDirTS.z * 0.7 + 0.3 + params[0].FlattenAmount; // Fix for objects at extreme viewing angles @@ -496,7 +497,7 @@ namespace ExtendedMaterials #endif nearBlendToFar *= nearBlendToFar; float offset = (1.0 - parallaxAmount) * -maxHeight + minHeight; - pixelOffset = lerp(parallaxAmount * scale, 0, nearBlendToFar); + pixelOffset = saturate(lerp(parallaxAmount, 0.5, nearBlendToFar)); return lerp(viewDirTS.xy * offset + coords.xy, coords, nearBlendToFar); } @@ -509,7 +510,7 @@ namespace ExtendedMaterials weights[5] = input.LandBlendWeights2.y; #endif - pixelOffset = 0; + pixelOffset = 0.5; return coords; } diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 4ea0d4d07c..d4e54c77a3 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -20,10 +20,10 @@ namespace SharedData float Timer; uint FrameCount; uint FrameCountAlwaysActive; - bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon - bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) - bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach - float MipBias; // Offset to mip level for TAA sharpness# + bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon + bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) + bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach + float MipBias; // Offset to mip level for TAA sharpness float pad0; float4 AmbientSHR; float4 AmbientSHG; @@ -52,7 +52,7 @@ namespace SharedData bool EnableShadows; bool ExtendShadows; bool EnableParallaxWarpingFix; - float1 pad0; + bool pad0; }; struct CubemapCreatorSettings diff --git a/package/Shaders/Common/VR.hlsli b/package/Shaders/Common/VR.hlsli index d744022781..a3b3783c71 100644 --- a/package/Shaders/Common/VR.hlsli +++ b/package/Shaders/Common/VR.hlsli @@ -21,6 +21,7 @@ cbuffer VRValues : register(b13) float2 EyeOffsetScale : packoffset(c0.z); float4 EyeClipEdge[2] : packoffset(c1); } + #endif namespace Stereo diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl index f149255718..cfaefc714b 100644 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -19,6 +19,10 @@ RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); RWTexture2D MotionVectorsRW : register(u2); Texture2D DepthTexture : register(t4); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + #if defined(DYNAMIC_CUBEMAPS) Texture2D ReflectanceTexture : register(t5); TextureCube EnvTexture : register(t6); @@ -92,6 +96,16 @@ void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, uv *= FrameBuffer::DynamicResolutionParams2.xy; // adjust for dynamic res uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[uint2(dispatchID.xy)] & 0x0F; + if (mode == 2 || mode == 1) { // MODE_MAIN or MODE_EDGE — stencil-culled, reprojected by StereoBlend + return; + } + } +#endif + uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); float3 normalGlossiness = NormalRoughnessTexture[dispatchID.xy]; diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index aad23d96c7..ba0f19f3b9 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -3166,7 +3166,15 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } # endif - psout.Reflectance = float4(indirectLobeWeights.specular, psout.Diffuse.w); +# if defined(VR) && (defined(EMAT) || defined(TRUE_PBR)) && (defined(PARALLAX) || defined(LANDSCAPE) || defined(TRUE_PBR)) + // VR: store POM parallax amount for stereo reprojection depth correction. + // Read by StereoBlendCS to adjust Eye 1 (right eye) reprojection depth + // at POM-displaced surfaces. Not consumed on flat (SE/AE). + psout.Reflectance = float4(indirectLobeWeights.specular, + (pixelOffset > 0.0) ? saturate(pixelOffset) : 0.0); +# else + psout.Reflectance = float4(indirectLobeWeights.specular, 0.0); +# endif psout.NormalGlossiness = float4(GBuffer::EncodeNormal(screenSpaceNormal), saturate(1.0 - material.Roughness), psout.Diffuse.w); # if defined(SNOW) diff --git a/package/Shaders/RunGrass.hlsl b/package/Shaders/RunGrass.hlsl index f05c3d0edd..820503ab93 100644 --- a/package/Shaders/RunGrass.hlsl +++ b/package/Shaders/RunGrass.hlsl @@ -850,7 +850,6 @@ PS_OUTPUT main(PS_INPUT input) # if defined(RENDER_DEPTH) float diffuseAlpha = input.VertexColor.w * baseColor.w; - if ((diffuseAlpha - AlphaTestRefRS) < 0) { discard; } diff --git a/package/Shaders/VR/StereoBlendCS.hlsl b/package/Shaders/VR/StereoBlendCS.hlsl index 7322e9e513..bf5a082685 100644 --- a/package/Shaders/VR/StereoBlendCS.hlsl +++ b/package/Shaders/VR/StereoBlendCS.hlsl @@ -11,6 +11,7 @@ #include "Common/Color.hlsli" #include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" #include "Common/VR.hlsli" Texture2D ColorTexture : register(t0); @@ -18,6 +19,30 @@ Texture2D DepthTexture : register(t1); RWTexture2D OutputRW : register(u0); +#ifdef STEREO_OVERWRITE +RWTexture2D MotionRW : register(u1); +Texture2D ModeTexture : register(t2); +Texture2D ReflectanceTexture : register(t3); // .w = POM pixelOffset from Lighting pass +SamplerState LinearSampler : register(s0); + +# include "VRStereoOptimizations/modes.hlsli" + +// Hardware bilinear color sample from reprojected pixel coordinates. +// Converts integer pixel coords to proper full-texture UV for SampleLevel, +// clamped to the active DRS viewport to prevent sampling stale data. +// Motion vectors stay as integer Load() — filtering them breaks DLSS. +float4 SampleReprojectedColor(float2 stereoUV, float2 frameDim) +{ + uint texW, texH; + ColorTexture.GetDimensions(texW, texH); + float2 texSize = float2(texW, texH); + float2 minUV = 0.5 / texSize; + float2 maxUV = (frameDim - 0.5) / texSize; + stereoUV = clamp(stereoUV, minUV, maxUV); + return ColorTexture.SampleLevel(LinearSampler, stereoUV, 0); +} +#endif + cbuffer StereoBlendCB : register(b1) { float2 FrameDim; @@ -25,11 +50,16 @@ cbuffer StereoBlendCB : register(b1) float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer, 3 = POM depth heatmap + float FullBlendDistance; + float POMDepthScale; + float _pad; }; -static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend -static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend +static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kDepthAgreementThreshold = 0.015; // Relative depth difference threshold for overwrite mode disocclusion rejection // Samples four depth neighbors in a cross pattern (±offset pixels) around center, // clamped to eyeIndex's half of the packed stereo buffer to avoid seam contamination. @@ -46,6 +76,192 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (any(dtid >= uint2(FrameDim))) return; +#ifdef STEREO_OVERWRITE + // ========================================================================= + // Mode-driven stereo merge: reads per-pixel classification from StencilCS + // and applies appropriate action per mode and eye. + // Mode texture is full SBS resolution — ModeTexture[dtid] maps directly. + // ========================================================================= + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float centerDepth = DepthTexture[dtid]; + + // HMD mask pixels (depth >= 1.0 in reversed-Z) — always skip + if (centerDepth >= 1.0) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Debug mode 1: depth map diagnostic — show mode texture as solid colors (all pixels) + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); + return; + } + + // Debug mode 2: full blend depth visualizer — cyan tint based on proximity to FullBlendDistance + if (DebugMode == 2) { + if (centerDepth < 1e-5 || centerDepth >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(centerDepth); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // Debug mode 3: POM depth data visualizer — show Reflectance.w as color + if (DebugMode == 3) { + float pomVal = ReflectanceTexture[dtid].w; + float4 c = ColorTexture[dtid]; + if (pomVal > 1e-2) { + // POM pixel: red-to-green gradient based on parallaxAmount + // Red = peak (high pomVal, closer to camera), Green = valley (low pomVal, farther), Yellow = geometry plane + float3 pomColor = float3(pomVal, 1.0 - pomVal, 0); + OutputRW[dtid] = float4(lerp(c.rgb, pomColor, 0.7), c.a); + } + // Non-POM pixels (pomVal ~ 0) left untouched + return; + } + + // MODE_DISOCCLUDED: fully shaded, leave untouched + if (pixelMode == MODE_DISOCCLUDED) + return; + + // MODE_FULL_BLEND: bilateral blend for 2x supersampling + if (pixelMode == MODE_FULL_BLEND) { + float4 center = ColorTexture[dtid]; + + // Check for POM depth offset at this pixel + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float reprojDepthFB = centerDepth; + float pomOffsetFB = ReflectanceTexture[dtid].w; + if (pomOffsetFB > 1e-2 && POMDepthScale > 0) { + float linDepthFB = SharedData::GetScreenDepth(centerDepth); + float depthCorrectionFB = (0.5 - pomOffsetFB) * POMDepthScale; + float newLinDepthFB = max(linDepthFB + depthCorrectionFB, 1e-4); + reprojDepthFB = (SharedData::CameraData.x - SharedData::CameraData.w / newLinDepthFB) / SharedData::CameraData.z; + } + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepthFB, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = SampleReprojectedColor(r.otherStereoUV, FrameDim); + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + float4 result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + + OutputRW[dtid] = result; + return; + } + + if (eyeIndex == 0) { + // Eye 0 (left eye): fully shaded for all modes — only apply debug tint to edge pixels + if (DebugEdgeTint > 0 && pixelMode == MODE_EDGE) { + float4 c = ColorTexture[dtid]; + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), DebugEdgeTint), c.a); + } + return; + } + + // Eye 1 (right eye): reproject all non-disoccluded, non-full-blend pixels + // (MAIN, EDGE) from Eye 0 (left eye). In VR stereo rendering, Eye 0 is + // fully shaded; Eye 1 pixels marked as reprojectable by StencilCS are + // filled with reprojected color from Eye 0 to save GPU work. + // StencilCS already performed the authoritative disocclusion check with the correct + // depth buffer state — no redundant depth agreement check here. + float reprojDepth = centerDepth; + + // First-pass reprojection to find Eye 0 source pixel + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); + if (!r.valid) + return; + + // Save first-pass result as fallback before POM adjustment + Stereo::StereoBilateralResult firstPassR = r; + + // Read POM offset from Eye 0 source's reflectance.w + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float pomOffset = ReflectanceTexture[r.otherPx].w; + if (pomOffset > 1e-2) { + // Re-reproject with POM-adjusted depth centered at geometry plane + float linearDepth = SharedData::GetScreenDepth(centerDepth); + float depthCorrection = (0.5 - pomOffset) * POMDepthScale; + float newLinearDepth = max(linearDepth + depthCorrection, 1e-4); + reprojDepth = (SharedData::CameraData.x - SharedData::CameraData.w / newLinearDepth) / SharedData::CameraData.z; + r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); + if (!r.valid) + r = firstPassR; // Fall back to non-POM reprojection + } + + // Skip if the Eye 0 source pixel is sky/unrendered (depth at clear value). + // At DeferredPasses time, sky hasn't rendered yet — source would have clear color. + // Let the sky/water pass fill these pixels later instead. + float sourceDepth = DepthTexture[r.otherPx]; + if (sourceDepth >= 1.0 || sourceDepth < 1e-5) { + // POM adjustment landed on sky — try the original first-pass source + if (r.otherPx.x != firstPassR.otherPx.x || r.otherPx.y != firstPassR.otherPx.y) { + float fallbackDepth = DepthTexture[firstPassR.otherPx]; + if (fallbackDepth < 1.0 && fallbackDepth >= 1e-5) { + r = firstPassR; + } else { + return; + } + } else { + return; + } + } + + OutputRW[dtid] = SampleReprojectedColor(r.otherStereoUV, FrameDim); + MotionRW[dtid] = MotionRW[r.otherPx]; + +#else // Normal bilateral blend path + +# ifdef EYE0_ONLY + // Only process Eye 0 (left half) - Eye 1 left untouched + float2 uvCheck = (dtid + 0.5) * RcpFrameDim; + if (Stereo::GetEyeIndexFromTexCoord(uvCheck) == 1) + return; +# endif + float2 uv = (dtid + 0.5) * RcpFrameDim; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); @@ -78,10 +294,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (r.valid) { float otherDepth = DepthTexture[r.otherPx]; - // Destination edge detection: skip if the reprojected pixel is near the HMD - // mask boundary or at a depth discontinuity in the other eye. Due to VR - // parallax the arm silhouette appears at a different screen position per eye, - // so the reprojection can cross a boundary invisible from this eye. float4 dstEdgeDepths = SampleCrossDepths(r.otherPx, kEdgeMargin, 1 - eyeIndex); if (any(dstEdgeDepths < 1e-5) || Stereo::MaxDepthDiff(otherDepth, dstEdgeDepths) > kEdgeDepthThreshold) { debugState = 2; @@ -89,9 +301,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float4 otherColor = ColorTexture[r.otherPx]; Stereo::FinalizeStereoBlend(r, uv, centerDepth, otherDepth, eyeIndex, FrameDim, DepthSigma, MaxBlendFactor); - // Only blend where the two eyes actually disagree (screen-space effect - // inconsistency). Luminance difference below the threshold means both - // eyes computed the same result and blending would only destroy parallax. float colorDiff = abs(dot(centerColor.rgb, float3(0.2126, 0.7152, 0.0722)) - dot(otherColor.rgb, float3(0.2126, 0.7152, 0.0722))); float colorGate = smoothstep(ColorDiffThreshold * 0.5, ColorDiffThreshold * 2.0, colorDiff); @@ -106,7 +315,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } } -#ifdef DEBUG_BACKCHECK +# ifdef DEBUG_BACKCHECK // Debug visualization (6 states): // Blue = mask/sky: skipped // Yellow = source edge: depth discontinuity at this pixel @@ -123,7 +332,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float3(0.5, 0.0, 0.0) // 5: back-check failed - red }; OutputRW[dtid] = float4(lerp(centerColor.rgb, debugColors[debugState], 0.7), centerColor.a); -#elif defined(DEBUG_BLEND_WEIGHT) +# elif defined(DEBUG_BLEND_WEIGHT) // Blend weight heatmap: only pixels with actual blend activity are colorized. // Untouched pixels pass through unmodified. float w = saturate(r.blendWeight / max(MaxBlendFactor, 1e-5)); @@ -133,7 +342,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = centerColor; } -#elif defined(DEBUG_EDGE_DETECTION) +# elif defined(DEBUG_EDGE_DETECTION) // Edge detection visualizer: highlights pixels excluded by depth discontinuity checks. // Non-edge pixels show the normal blended output for scene context. // Bright yellow = source edge: discontinuity at this pixel @@ -145,7 +354,9 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = blendedColor; } -#else +# else OutputRW[dtid] = blendedColor; -#endif +# endif + +#endif // STEREO_OVERWRITE } diff --git a/package/Shaders/VR/VRPostProcessCS.hlsl b/package/Shaders/VR/VRPostProcessCS.hlsl new file mode 100644 index 0000000000..770e244553 --- /dev/null +++ b/package/Shaders/VR/VRPostProcessCS.hlsl @@ -0,0 +1,109 @@ +// VR Post-Process - Bilateral blend for near-camera 2x supersampling +// +// Runs after all compositing and stereo blending is complete. +// Reads per-pixel classification from StencilCS and applies: +// - MODE_FULL_BLEND: bilateral depth-weighted blend for 2x supersampling +// +// Only MODE_FULL_BLEND pixels are processed. All others pass through untouched. + +#include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" + +Texture2D ColorTexture : register(t0); // Copy of final composited image +Texture2D ModeTexture : register(t1); +Texture2D DepthTexture : register(t2); + +RWTexture2D OutputRW : register(u0); + +cbuffer VRPostProcessCB : register(b1) +{ + float2 FrameDim; + float2 RcpFrameDim; + float DebugEdgeTint; // 0 = off, >0 = debug visualization strength + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer + float FullBlendDistance; // Linearized depth threshold for full blend zone visualization + float _pad; // Pad to 16-byte alignment +}; + +#include "VRStereoOptimizations/modes.hlsli" + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + if (any(dtid >= uint2(FrameDim))) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Depth map diagnostic: show mode texture contents as solid colors + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); // Orange = full blend zone + return; + } + + // Full blend depth visualizer: shows the depth boundary as a cyan tint + if (DebugMode == 2) { + float2 uvDb = (dtid + 0.5) * RcpFrameDim; + float depthDb = DepthTexture[dtid]; + if (depthDb < 1e-5 || depthDb >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(depthDb); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // Only process full blend pixels + if (pixelMode != MODE_FULL_BLEND) + return; + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float4 result = ColorTexture[dtid]; + + // === MODE_FULL_BLEND: bilateral blend for 2x supersampling === + { + float4 center = result; + float centerDepth = DepthTexture[dtid]; + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes. + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = ColorTexture[r.otherPx]; + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + } + + OutputRW[dtid] = result; +} diff --git a/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl new file mode 100644 index 0000000000..bd34d26d58 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl @@ -0,0 +1,55 @@ +// VR Stereo Optimizations - Reprojection Compute Shader +// +// Fills Eye 1 pixels that were stencil-culled during rendering by reprojecting +// color data from Eye 0. Only operates on pixels classified as MODE_MAIN. +// +// Reads Eye 0 color directly from the OutputRW UAV (left half) and writes to +// Eye 1 (right half). No read-write conflict because reads and writes target +// strictly different halves of the texture. +// +// Input: +// t0 = Depth buffer +// t1 = Per-pixel mode classification texture +// Output: +// u0 = Main render target UAV (reads Eye 0, writes Eye 1) + +#include "Common/VR.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); +Texture2D ModeTexture : register(t1); + +RWTexture2D OutputRW : register(u0); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + uint eyeWidth = (uint)FrameDim.x / 2; + uint eyeHeight = (uint)FrameDim.y; + + if (any(dtid >= uint2(eyeWidth, eyeHeight))) + return; + + // dtid is in Eye 1 local coords; convert to stereo buffer coords + uint2 stereoCoord = uint2(dtid.x + eyeWidth, dtid.y); + + // Only fill pixels that were marked for reprojection + // Mode texture is full SBS resolution, so use stereoCoord for Eye 1 + uint mode = ModeTexture[stereoCoord]; + if (mode != MODE_MAIN) + return; + + float depth = DepthTexture[stereoCoord]; + + // Compute mono UV for this Eye 1 pixel + float2 stereoUV = (float2(stereoCoord) + 0.5) * RcpFrameDim; + float2 monoUV = Stereo::ConvertFromStereoUV(stereoUV, 1); + + // Reproject to Eye 0 and sample color + float3 otherEyeUV = Stereo::ConvertMonoUVToOtherEye(float3(monoUV, depth), 1); + float2 eye0StereoUV = Stereo::ConvertToStereoUV(otherEyeUV.xy, 0); + int2 eye0Px = clamp(int2(eye0StereoUV * FrameDim), int2(0, 0), int2(FrameDim) - 1); + + float4 reprojectedColor = OutputRW[eye0Px]; + + // Write to Eye 1 in the main render target + OutputRW[stereoCoord] = reprojectedColor; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl new file mode 100644 index 0000000000..1709796234 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl @@ -0,0 +1,153 @@ +// VR Stereo Optimizations - Stencil Classification Compute Shader +// +// Classifies BOTH eyes over the full SBS buffer. Each pixel is tagged as: +// MODE_DISOCCLUDED - Must be fully shaded (sky, HMD mask, parallax-occluded) +// MODE_EDGE - Depth edge boundary (dist 1) or inner/foreground band; fully shaded + bilateral blend +// MODE_MAIN - Standard pixel eligible for reprojection / bilateral blend +// MODE_FULL_BLEND - Near-camera geometry: both eyes fully shaded for 2x supersampling +// +// Dispatched over full SBS resolution (FrameDim.x x FrameDim.y). + +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); + +RWTexture2D ModeTextureRW : register(u0); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + if (any(dtid >= uint2(FrameDim))) + return; + + // Determine which eye this pixel belongs to + float2 uv = (float2(dtid) + 0.5) / FrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + // Read depth directly in SBS coords + float centerDepth = DepthTexture[dtid]; + +#ifdef DEBUG_DEPTH_MAP + // DIAGNOSTIC: Visualize what depth values StencilCS sees. + // Green (MODE_EDGE) = depth >= 1.0 (HMD mask threshold) + // Magenta (MODE_EDGE_NEIGHBOUR) = depth < 1e-5 (sky threshold) + // No tint (MODE_MAIN) = normal geometry with valid depth + if (centerDepth >= 1.0) { + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + if (centerDepth < 1e-5) { + ModeTextureRW[dtid] = MODE_EDGE_NEIGHBOUR; + return; + } + ModeTextureRW[dtid] = MODE_MAIN; + return; +#endif + + // Sky/unrendered pixels (depth >= 1.0 at z-prepass time = depth buffer clear value) + // and HMD mask pixels both have depth >= 1.0 here. Treat them the same as sky: + // let edge detection run so geometry-vs-sky boundaries get classified. + // HMD mask pixels are in lens corners with no nearby geometry, so they'll + // fall through to MODE_DISOCCLUDED at the end. + bool isSky = (centerDepth < 1e-5) || (centerDepth >= 1.0); + float linCenter = isSky ? 999999.0 : SharedData::GetScreenDepth(centerDepth); + + // Near-camera supersampling: geometry closer than FullBlendDistance gets full + // shading in both eyes for bilateral blend (2x supersampling in VRPostProcess). + if (!isSky && linCenter < FullBlendDistance) { + ModeTextureRW[dtid] = MODE_FULL_BLEND; + return; + } + + // --- Disocclusion detection via reprojection (runs for all non-sky pixels) --- + // Early return: disoccluded pixels are always MODE_DISOCCLUDED regardless of edge proximity. + // This ensures MinEdgeDistance never affects disocclusion classification. + if (!isSky) { + Stereo::StereoBilateralResult reproj = Stereo::ReprojectToOtherEye( + uv, + centerDepth, + eyeIndex, + FrameDim); + + bool isDisoccluded = false; + if (!reproj.valid) { + isDisoccluded = true; + } else { + float otherDepth = DepthTexture[reproj.otherPx]; + // Raw reversed-Z depth comparison for disocclusion detection. + // Using raw depth avoids concentric semicircle artifacts that occur + // with linearized depth due to precision band boundaries in the + // hyperbolic depth-to-linear conversion. + float maxRaw = max(max(centerDepth, otherDepth), 1e-7); + float rawRelDiff = abs(centerDepth - otherDepth) / maxRaw; + isDisoccluded = (rawRelDiff > DisocclusionThreshold); + } + + if (isDisoccluded) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + } + + // Depth gate: skip edge detection for nearby geometry (saves perf, distant AA matters more) + // Sky pixels always run edge detection — they need to expand the edge band outward. + // Disocclusion detection (above) is independent of this gate and always runs. + bool skipEdgeDetection = !isSky && (linCenter < MinEdgeDistance); + + // --- Edge detection with two-tier classification --- + // MODE_EDGE: immediate neighbor (distance 1) has depth discontinuity, OR + // inner/foreground band (distance <= kInnerWidth). + static const uint kInnerWidth = 2; + int2 offsets[4] = { int2(-1, 0), int2(1, 0), int2(0, -1), int2(0, 1) }; + + uint nearestEdgeDist = 0xFFFFFFFF; // nearest distance at which a discontinuity was found + bool nearestWeAreOuter = false; // whether we are on the background side at that nearest hit + + // Use the larger of inner/outer widths for the search + uint maxWidth = kInnerWidth; + + if (!skipEdgeDetection) { + [loop] for (uint d = 1; d <= maxWidth; d++) + { + [unroll] for (int i = 0; i < 4; i++) + { + int2 rawNeighbor = int2(dtid) + offsets[i] * (int)d; + uint2 neighborCoord = Stereo::ClampToEyeBounds(rawNeighbor, eyeIndex, FrameDim); + + float neighborDepth = DepthTexture[neighborCoord]; + bool neighborIsSky = (neighborDepth < 1e-5) || (neighborDepth >= 1.0); + float linNeighbor = neighborIsSky ? 999999.0 : SharedData::GetScreenDepth(neighborDepth); + float maxLin = max(max(linCenter, linNeighbor), 1e-5); + float relDepthDiff = abs(linCenter - linNeighbor) / maxLin; + + if (relDepthDiff > EdgeDepthThreshold && d < nearestEdgeDist) { + nearestEdgeDist = d; + nearestWeAreOuter = (linNeighbor < linCenter); // neighbor closer to camera = we are background + } + } + } + + } // !skipEdgeDetection + + if (nearestEdgeDist != 0xFFFFFFFF) { + // Classify based on distance and side + if (nearestEdgeDist == 1) { + // Immediate neighbor discontinuity: always MODE_EDGE regardless of side + ModeTextureRW[dtid] = MODE_EDGE; + return; + } else if (!nearestWeAreOuter && nearestEdgeDist <= kInnerWidth) { + // Inner/foreground band beyond distance 1 + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + } + + // Sky pixels that aren't near edges -> disoccluded (reprojection is meaningless for sky) + if (isSky) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + + // Standard pixel + ModeTextureRW[dtid] = MODE_MAIN; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl new file mode 100644 index 0000000000..c45c2a2409 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl @@ -0,0 +1,54 @@ +// VR Stereo Optimizations - Stencil Write Pixel Shader +// +// Reads from the per-pixel mode classification texture and depth texture. +// Discards pixels that should NOT be stencil-culled: +// - MODE_DISOCCLUDED (0) = fully shaded in Eye 1, no reprojection needed +// - MODE_FULL_BLEND (4) = near-camera pixels fully shaded in both eyes for supersampling +// - Sky/HMD-mask pixels (depth >= 1.0 or depth < 1e-5) = need normal rendering +// in the sky pass; they keep their MODE_EDGE tag in +// the mode texture for VRPostProcess but must not be stencil-culled. +// +// Only geometry MODE_MAIN/MODE_EDGE pixels survive and get stencil ref=1 written. +// +// Mode texture is full SBS resolution (same as render target). +// The DSS is configured with StencilFunc=ALWAYS, StencilPassOp=REPLACE, ref=1. +// Pixels that survive (not discarded) get stencil=1 written. + +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D ModeTexture : register(t0); +Texture2D DepthTexture : register(t1); + +struct PS_INPUT +{ + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; +}; + +void main(PS_INPUT input) +{ + // Mode texture is full SBS resolution — SV_Position maps directly + // (viewport is Eye 1 half, so SV_Position.x starts at eyeWidth) + int2 modeCoord = int2(input.Position.xy); + + uint mode = ModeTexture[modeCoord]; + + // MODE_MAIN and MODE_EDGE in Eye 1 write stencil ref=1 (reprojectable). + // These are reprojected from Eye 0; MODE_DISOCCLUDED and MODE_FULL_BLEND are fully shaded in Eye 1. + if (mode == MODE_DISOCCLUDED) + discard; + + // Sky/HMD-mask pixels must not be stencil-culled regardless of edge classification. + // They keep their MODE_EDGE tag in the mode texture for VRPostProcess, + // but must render normally in the sky pass (which runs after stencil culling). + float depth = DepthTexture[modeCoord]; + if (depth >= 1.0 || depth < 1e-5) + discard; + + // MODE_FULL_BLEND: near-camera pixels fully shaded in both eyes for supersampling + if (mode == MODE_FULL_BLEND) + discard; + + // Pixel survives: DSS writes stencil ref=1 + // No color output (no RTV bound) +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl new file mode 100644 index 0000000000..353aa53379 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl @@ -0,0 +1,24 @@ +// VR Stereo Optimizations - Stencil Write Vertex Shader +// +// Procedural fullscreen triangle covering Eye 1 (right half of SBS buffer). +// No vertex buffer needed — vertex positions are generated from SV_VertexID. +// The viewport is set to Eye 1 by the C++ code, so we just emit a standard +// fullscreen triangle in clip space. + +struct VS_OUTPUT +{ + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; +}; + +VS_OUTPUT main(uint vertexID : SV_VertexID) +{ + VS_OUTPUT output; + + // Fullscreen triangle: 3 vertices covering [-1,1] clip space + float2 uv = float2((vertexID << 1) & 2, vertexID & 2); + output.Position = float4(uv * float2(2, -2) + float2(-1, 1), 0, 1); + output.TexCoord = uv; + + return output; +} diff --git a/package/Shaders/VRStereoOptimizations/cbuffers.hlsli b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli new file mode 100644 index 0000000000..60a900387c --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli @@ -0,0 +1,31 @@ +// VR Stereo Optimizations - Shared constant buffer layout +// Must match VRStereoOptParams in VRStereoOptimizations.h exactly + +#ifndef __VR_STEREO_OPT_CBUFFERS_HLSLI__ +#define __VR_STEREO_OPT_CBUFFERS_HLSLI__ + +cbuffer VRStereoOptParams : register(b1) +{ + float2 FrameDim; // Full stereo buffer dimensions (both eyes) + float2 RcpFrameDim; // 1.0 / FrameDim + + uint StereoModeValue; // 0=Off, 1=Enable + float DisocclusionThreshold; // Depth difference threshold for disocclusion detection + float EdgeDepthThreshold; // Relative depth difference threshold for edge detection + uint EdgeWidth; // Half-width of edge detection band in pixels + + float2 QualityJitter; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; // Radius of foveal region in UV space + float pad2; + + float2 FoveatedCenter; // Center of foveal region in UV space + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth below which pixels get MODE_FULL_BLEND (game units) +}; + +#define STEREO_MODE_OFF 0 +#define STEREO_MODE_ENABLE 1 + +#include "VRStereoOptimizations/modes.hlsli" + +#endif diff --git a/package/Shaders/VRStereoOptimizations/modes.hlsli b/package/Shaders/VRStereoOptimizations/modes.hlsli new file mode 100644 index 0000000000..95fb721833 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/modes.hlsli @@ -0,0 +1,10 @@ +#ifndef __VR_STEREO_OPT_MODES_HLSLI__ +#define __VR_STEREO_OPT_MODES_HLSLI__ + +#define MODE_DISOCCLUDED 0 +#define MODE_EDGE 1 +#define MODE_MAIN 2 +#define MODE_EDGE_NEIGHBOUR 3 +#define MODE_FULL_BLEND 4 + +#endif diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 0106b7449d..ee6762fcdb 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -279,6 +279,11 @@ void Deferred::StartDeferred() PrepassPasses(); OverrideBlendStates(); + + // VR: Classify Eye 1 pixels and write hardware stencil marks before geometry rendering + if (globals::game::isVR) { + globals::features::vr.stereoOpt.DispatchStencil(); + } } void Deferred::DeferredPasses() @@ -367,6 +372,13 @@ void Deferred::DeferredPasses() context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + // Bind VRStereoOptimizations mode texture for Eye 1 skip + auto& vrStereoOpt = globals::features::vr.stereoOpt; + if (vrStereoOpt.loaded) { + ID3D11ShaderResourceView* modeSRV = vrStereoOpt.GetModeTextureSRV(); + context->CSSetShaderResources(16, 1, &modeSRV); + } + ID3D11UnorderedAccessView* uavs[3]{ main.UAV, normals.UAV, motionVectors.UAV }; context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); @@ -374,13 +386,28 @@ void Deferred::DeferredPasses() context->CSSetShader(shader, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + + // Unbind mode texture SRV + if (vrStereoOpt.loaded) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources(16, 1, &nullSRV); + } + } + + // VR: Deactivate stencil culling now that geometry rendering is complete. + // Must happen before StereoBlend so the blend pass itself isn't stencil-blocked. + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vr.stereoOpt; + if (stereoOpt.IsStencilActive()) { + stereoOpt.DeactivateStencil(); + } } - // VR stereo consistency blend - depth-aware bilateral blend at the eye seam - // Runs after composite as a general safety net for all screen-space effects. - // Must run before clearing b12/b13 -- needs FrameBuffer matrices for reprojection. - if (globals::game::isVR) + // VR: Stereo reprojection fills Eye 1 holes here (after DeferredComposite, before SSR/water/sky) + // so that ISReflectionsRayTracing sees valid pixels in both eyes. + if (globals::game::isVR) { globals::features::vr.DrawStereoBlend(); + } // Clear { @@ -479,6 +506,10 @@ void Deferred::OverrideBlendStates() blendDesc.RenderTarget[i].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; } + // RT[5] = REFLECTANCE: enable alpha writes for POM depth data + // stored in Reflectance.w, used by StereoBlendCS for depth-aware reprojection + blendDesc.RenderTarget[5].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[a][b][c][d])); } else { deferredBlendStates[a][b][c][d] = nullptr; @@ -555,6 +586,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainComposite() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vr.stereoOpt.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeCS; @@ -580,6 +614,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainCompositeInterior() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vr.stereoOpt.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeInteriorCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeInteriorCS; @@ -597,6 +634,7 @@ void Deferred::Hooks::Main_RenderWorld::thunk(bool a1) state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::InWorld); state->inWorld = true; func(a1); + state->inWorld = false; state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::InWorld); }; diff --git a/src/Features/ExtendedMaterials.h b/src/Features/ExtendedMaterials.h index 10519a9a4f..e4fb5c7440 100644 --- a/src/Features/ExtendedMaterials.h +++ b/src/Features/ExtendedMaterials.h @@ -36,7 +36,7 @@ struct ExtendedMaterials : Feature uint ExtendShadows = 1; uint EnableParallaxWarpingFix = 1; - float pad[1]; + uint pad0 = 0; }; STATIC_ASSERT_ALIGNAS_16(Settings); diff --git a/src/Features/VR.cpp b/src/Features/VR.cpp index e6ed6af7bb..ecc6bcc1d0 100644 --- a/src/Features/VR.cpp +++ b/src/Features/VR.cpp @@ -44,7 +44,8 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( EnableStereoBlend, StereoBlendDepthSigma, StereoBlendMaxFactor, - StereoBlendColorThreshold) + StereoBlendColorThreshold, + StereoBlendDebugMode) //============================================================================= // FEATURE BASE CLASS OVERRIDES @@ -54,16 +55,26 @@ void VR::LoadSettings(json& o_json) { settings = o_json.get(); settings.ClampToValidRanges(); + if (o_json.contains("StereoOptimizations")) { + json stereoOptJson = o_json["StereoOptimizations"]; + stereoOpt.LoadSettings(stereoOptJson); + } } void VR::SaveSettings(json& o_json) { o_json = settings; + { + json stereoOptJson; + stereoOpt.SaveSettings(stereoOptJson); + o_json["StereoOptimizations"] = stereoOptJson; + } } void VR::RestoreDefaultSettings() { settings = {}; + stereoOpt.RestoreDefaultSettings(); } void VR::SetupResources() @@ -88,6 +99,12 @@ void VR::SetupResources() if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", edgeDetectionDefines, "cs_5_0"))) stereoBlendDebugEdgeDetectionCS.attach(rawPtr); + // Overwrite mode: direct replacement instead of blend (for stencil culling) + auto overwriteDefines = defines; + overwriteDefines.push_back({ "STEREO_OVERWRITE", "" }); + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", overwriteDefines, "cs_5_0"))) + stereoBlendOverwriteCS.attach(rawPtr); + auto renderer = globals::game::renderer; auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; D3D11_TEXTURE2D_DESC mainDesc; @@ -103,6 +120,11 @@ void VR::SetupResources() stereoBlendCopyTex->CreateSRV(srvDesc); stereoBlendCB = eastl::make_unique(ConstantBufferDesc()); + if (REL::Module::IsVR()) { + stereoOpt.SetupResources(); + stereoOpt.loaded = stereoOpt.GetModeTextureSRV() != nullptr; + } + DetectOpenVRInfo(); if (openVRInfo.isAvailable) { @@ -274,3 +296,8 @@ bool VR::IsOpenVRCompatible() const { return globals::game::isVR && openVRInfo.isCompatible; } + +void VR::Reset() +{ + stereoOpt.Reset(); +} diff --git a/src/Features/VR.h b/src/Features/VR.h index 06789eaac3..f3c2a1807f 100644 --- a/src/Features/VR.h +++ b/src/Features/VR.h @@ -1,6 +1,7 @@ #pragma once #include "Menu.h" #include "OverlayFeature.h" +#include "VRStereoOptimizations.h" #include "Utils/Input.h" #include "VR/OpenVRDetection.h" // In Features/VR/ #include @@ -109,6 +110,9 @@ struct VR : OverlayFeature }; } + virtual inline std::string_view GetShaderDefineName() override { return "VR_STEREO_OPT"; } + virtual inline bool HasShaderDefine(RE::BSShader::Type t) override { return stereoOpt.loaded && t == RE::BSShader::Type::Utility; } + virtual void Reset() override; virtual void SetupResources() override; virtual void ClearShaderCache() override; virtual bool SupportsVR() override { return true; } @@ -260,7 +264,7 @@ struct VR : OverlayFeature StereoBlendDepthSigma = std::clamp(StereoBlendDepthSigma, 0.001f, 0.1f); StereoBlendMaxFactor = std::clamp(StereoBlendMaxFactor, 0.0f, 0.5f); StereoBlendColorThreshold = std::clamp(StereoBlendColorThreshold, 0.0f, 0.2f); - StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 3); + StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 5); } }; @@ -358,8 +362,12 @@ struct VR : OverlayFeature winrt::com_ptr stereoBlendDebugBackCheckCS; winrt::com_ptr stereoBlendDebugBlendWeightCS; winrt::com_ptr stereoBlendDebugEdgeDetectionCS; + winrt::com_ptr stereoBlendOverwriteCS; eastl::unique_ptr stereoBlendCopyTex; eastl::unique_ptr stereoBlendCB; + winrt::com_ptr stereoBlendLinearSampler; + + VRStereoOptimizations stereoOpt; struct alignas(16) StereoBlendCB { @@ -368,7 +376,11 @@ struct VR : OverlayFeature float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint32_t DebugMode; + float FullBlendDistance; + float POMDepthScale; + float _pad; }; // Engine hook integration points diff --git a/src/Features/VR/SettingsUI.cpp b/src/Features/VR/SettingsUI.cpp index 5be4fc156c..55ebe0e3a5 100644 --- a/src/Features/VR/SettingsUI.cpp +++ b/src/Features/VR/SettingsUI.cpp @@ -73,7 +73,7 @@ void VR::DrawOverlay() static LARGE_INTEGER overlayShowStart = { 0 }; static LARGE_INTEGER freq = { 0 }; - bool shouldShow = settings.kAutoHideSeconds > 0 && globals::state->isMainMenuOpen && globals::menu && !globals::menu->IsEnabled; + bool shouldShow = settings.kAutoHideSeconds > 0 && globals::game::ui && globals::game::ui->IsMenuOpen(RE::MainMenu::MENU_NAME) && globals::menu && !globals::menu->IsEnabled; if (!shouldShow) { overlayShowStart.QuadPart = 0; @@ -108,7 +108,7 @@ void VR::DrawOverlay() ImGui::Begin("HowToUseOverlay", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav); - ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f * scale); + ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f); ImGui::TextWrapped("How to Use VR Community Shaders Menu:"); ImGui::Separator(); ImGui::TextWrapped("You must open the Main Menu or Tween Menu before VR controls work."); @@ -124,7 +124,7 @@ void VR::DrawOverlay() Util::DrawButtonCombo(settings.VRMenuCloseKeys, true); ImGui::Spacing(); - ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f * scale); + ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f); ImGui::TextWrapped("Grip + Thumbstick: Adjust overlay depth (closer/farther)"); ImGui::Spacing(); ImGui::TextWrapped("Tip: Disable this VR overlay by setting Attach Mode to 'None' in VR settings."); @@ -324,25 +324,16 @@ namespace ImGui::Separator(); - const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection" }; + const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection", "Overwrite", "Overwrite Eye1" }; ImGui::Combo("Debug View", &settings.StereoBlendDebugMode, debugModes, IM_ARRAYSIZE(debugModes)); if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text( - "Off: Normal rendering.\n\n" - "Back-Check: Visualize reprojection outcomes.\n" - " Blue = sky or HMD mask (skipped).\n" - " Yellow = source edge rejected (depth discontinuity at this pixel).\n" - " Orange = destination edge rejected (discontinuity at reprojected pixel).\n" - " Grey = other eye can't see this point (out of bounds).\n" - " Green = back-check passed (surfaces match in both eyes).\n" - " Red = back-check failed (occlusion edge, blend penalized).\n\n" - "Blend Weight: Heatmap of stereo blend strength.\n" - " Cool/black = no blending. Hot/white = maximum blending.\n" - " Shows where the two eyes disagree and correction is applied.\n\n" - "Edge Detection: Highlights pixels excluded by depth discontinuity checks.\n" - " Yellow = source edge (discontinuity at this pixel).\n" - " Orange = destination edge (discontinuity at reprojected pixel).\n" - " Scene = all other pixels shown with normal blending."); + ImGui::Text("Stereo blend debug visualization modes:"); + ImGui::Text(" Off: Normal rendering"); + ImGui::Text(" Back-Check: Shows round-trip reprojection validation"); + ImGui::Text(" Blend Weight: Heatmap of bilateral blend intensity"); + ImGui::Text(" Edge Detection: Highlights depth discontinuities"); + ImGui::Text(" Overwrite: Shows stereo reprojection mode classification"); + ImGui::Text(" (Eye 0 = left eye, fully shaded; Eye 1 = right eye, reprojected)"); } ImGui::EndDisabled(); @@ -970,6 +961,9 @@ void VR::DrawSettings() if (BeginTabItemWithFont("Stereo", Menu::FontRole::Subheading)) { if (ImGui::BeginChild("##VRStereoFrame", { 0, 0 }, true)) { DrawStereoBlendSettings(); + if (ImGui::CollapsingHeader("Stereo Optimizations", ImGuiTreeNodeFlags_DefaultOpen)) { + stereoOpt.DrawSettings(); + } } ImGui::EndChild(); ImGui::EndTabItem(); diff --git a/src/Features/VR/StereoBlend.cpp b/src/Features/VR/StereoBlend.cpp index 1fa5d22240..e71e835cc2 100644 --- a/src/Features/VR/StereoBlend.cpp +++ b/src/Features/VR/StereoBlend.cpp @@ -1,9 +1,11 @@ #include "Features/VR.h" +#include "Deferred.h" #include "Features/DynamicCubemaps.h" #include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" #include "State.h" +#include "Utils/D3D.h" void VR::ClearShaderCache() { @@ -11,6 +13,8 @@ void VR::ClearShaderCache() stereoBlendDebugBackCheckCS = nullptr; stereoBlendDebugBlendWeightCS = nullptr; stereoBlendDebugEdgeDetectionCS = nullptr; + stereoBlendOverwriteCS = nullptr; + stereoOpt.ClearShaderCache(); } bool VR::AnyScreenSpaceEffectLoaded() @@ -22,10 +26,20 @@ bool VR::AnyScreenSpaceEffectLoaded() void VR::DrawStereoBlend() { - if (!REL::Module::IsVR() || !settings.EnableStereoBlend || !stereoBlendCS || !stereoBlendCopyTex || !stereoBlendCB) + bool vrStereoOptActive = globals::features::vr.stereoOpt.loaded && + globals::features::vr.stereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off && + stereoBlendOverwriteCS; + + if (!REL::Module::IsVR() || !stereoBlendCopyTex || !stereoBlendCB) + return; + + if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugSkipMerge) + return; + + if (!vrStereoOptActive && (!settings.EnableStereoBlend || !stereoBlendCS)) return; - if (!AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) + if (!vrStereoOptActive && !AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) return; ZoneScoped; @@ -55,37 +69,117 @@ void VR::DrawStereoBlend() cbData.MaxBlendFactor = settings.StereoBlendMaxFactor; cbData.ColorDiffThreshold = settings.StereoBlendColorThreshold; + // Pass debug edge tint from VRStereoOptimizations settings + if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugVisualization) + cbData.DebugEdgeTint = 0.3f; + else + cbData.DebugEdgeTint = 0.0f; + + // Debug mode: 0=normal, 1=depth map diagnostic, 2=full blend depth visualizer + if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugDepthMap) + cbData.DebugMode = 1u; + else if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugFullBlendDepth) + cbData.DebugMode = 2u; + else if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugPOMDepth) + cbData.DebugMode = 3u; + else + cbData.DebugMode = 0u; + + cbData.FullBlendDistance = vrStereoOptActive ? globals::features::vr.stereoOpt.settings.fullBlendDistance : 0.0f; + cbData.POMDepthScale = vrStereoOptActive ? globals::features::vr.stereoOpt.settings.pomDepthScale : 1.0f; + stereoBlendCB->Update(cbData); auto cbPtr = stereoBlendCB->CB(); - ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; - ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + auto& motionVectors = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR]; + + bool isOverwriteMode = vrStereoOptActive; ID3D11ComputeShader* activeCS = stereoBlendCS.get(); - if (settings.StereoBlendDebugMode == 1 && stereoBlendDebugBackCheckCS) - activeCS = stereoBlendDebugBackCheckCS.get(); - else if (settings.StereoBlendDebugMode == 2 && stereoBlendDebugBlendWeightCS) - activeCS = stereoBlendDebugBlendWeightCS.get(); - else if (settings.StereoBlendDebugMode == 3 && stereoBlendDebugEdgeDetectionCS) - activeCS = stereoBlendDebugEdgeDetectionCS.get(); + if (vrStereoOptActive) { + activeCS = stereoBlendOverwriteCS.get(); + } else { + int effectiveMode = settings.StereoBlendDebugMode; + if (effectiveMode == 1 && stereoBlendDebugBackCheckCS) + activeCS = stereoBlendDebugBackCheckCS.get(); + else if (effectiveMode == 2 && stereoBlendDebugBlendWeightCS) + activeCS = stereoBlendDebugBlendWeightCS.get(); + else if (effectiveMode == 3 && stereoBlendDebugEdgeDetectionCS) + activeCS = stereoBlendDebugEdgeDetectionCS.get(); + } + + // Save and unbind DSV to avoid SRV/DSV conflict on depth buffer in overwrite mode + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + if (isOverwriteMode) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + } + ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; context->CSSetConstantBuffers(1, 1, &cbPtr); context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetShader(activeCS, nullptr, 0); + if (isOverwriteMode) { + ID3D11ShaderResourceView* modeSRV = globals::features::vr.stereoOpt.GetModeTextureSRV(); + context->CSSetShaderResources(2, 1, &modeSRV); + + // Bind REFLECTANCE SRV for POM depth offset (stored in .w by Lighting pass) + auto& reflectanceRT = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; + context->CSSetShaderResources(3, 1, &reflectanceRT.SRV); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, motionVectors.UAV }; + context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + } else { + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + } + + // Bind linear sampler for hardware bilinear color sampling in overwrite mode + if (isOverwriteMode) { + if (!stereoBlendLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, stereoBlendLinearSampler.put()); + } + ID3D11SamplerState* samplers[] = { stereoBlendLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + } + + context->CSSetShader(activeCS, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); // Cleanup - srvs[0] = nullptr; - srvs[1] = nullptr; - uavs[0] = nullptr; - cbPtr = nullptr; - context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetConstantBuffers(1, 1, &cbPtr); + ID3D11ShaderResourceView* nullSRVs[4] = {}; + context->CSSetShaderResources(0, isOverwriteMode ? 4 : 2, nullSRVs); + ID3D11UnorderedAccessView* nullUAVs[2] = {}; + context->CSSetUnorderedAccessViews(0, isOverwriteMode ? 2 : 1, nullUAVs, nullptr); + ID3D11Buffer* nullCB = nullptr; + context->CSSetConstantBuffers(1, 1, &nullCB); + if (isOverwriteMode) { + ID3D11SamplerState* nullSampler[] = { nullptr }; + context->CSSetSamplers(0, 1, nullSampler); + } context->CSSetShader(nullptr, nullptr, 0); + // Restore DSV after CS dispatch in overwrite mode + if (isOverwriteMode && savedDSV) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + savedDSV->Release(); + } + if (globals::state->frameAnnotations) globals::state->EndPerfEvent(); } diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp new file mode 100644 index 0000000000..6e540fe6f5 --- /dev/null +++ b/src/Features/VRStereoOptimizations.cpp @@ -0,0 +1,651 @@ +#include "VRStereoOptimizations.h" + +#include "ExtendedMaterials.h" +#include "Globals.h" +#include "State.h" +#include "Utils/D3D.h" +#include "Utils/Game.h" + +#include + +// JSON enum serialization for StereoMode +NLOHMANN_JSON_SERIALIZE_ENUM(VRStereoOptimizations::StereoMode, { + { VRStereoOptimizations::StereoMode::Off, "Off" }, + { VRStereoOptimizations::StereoMode::Enable, "Enable" }, + }) + +//============================================================================= +// SETTINGS MANAGEMENT +//============================================================================= + +void VRStereoOptimizations::SaveSettings(json& o_json) +{ + o_json["StereoMode"] = settings.stereoMode; + o_json["DisocclusionDepthThreshold"] = settings.disocclusionDepthThreshold; + o_json["FullBlendDistance"] = settings.fullBlendDistance; + o_json["QualityJitterOffset"] = settings.qualityJitterOffset; + o_json["FoveatedRegionRadius"] = settings.foveatedRegionRadius; + o_json["FoveatedRegionCenterX"] = settings.foveatedRegionCenterX; + o_json["FoveatedRegionCenterY"] = settings.foveatedRegionCenterY; + o_json["UseEyeTracking"] = settings.useEyeTracking; + o_json["DebugVisualization"] = settings.debugVisualization; + o_json["DebugSkipMerge"] = settings.debugSkipMerge; + o_json["DebugForceAllStencil"] = settings.debugForceAllStencil; + o_json["DebugForceAllReprojectCS"] = settings.debugForceAllReprojectCS; + o_json["DebugDepthMap"] = settings.debugDepthMap; + o_json["POMDepthScale"] = settings.pomDepthScale; +} + +void VRStereoOptimizations::LoadSettings(json& o_json) +{ + if (o_json.contains("StereoMode")) + settings.stereoMode = o_json["StereoMode"].get(); + if (auto it = o_json.find("DisocclusionDepthThreshold"); it != o_json.end() && it->is_number()) + settings.disocclusionDepthThreshold = std::clamp(it->get(), 0.001f, 0.1f); + if (auto it = o_json.find("QualityJitterOffset"); it != o_json.end() && it->is_number()) + settings.qualityJitterOffset = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("FoveatedRegionRadius"); it != o_json.end() && it->is_number()) + settings.foveatedRegionRadius = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("FoveatedRegionCenterX"); it != o_json.end() && it->is_number()) + settings.foveatedRegionCenterX = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("FoveatedRegionCenterY"); it != o_json.end() && it->is_number()) + settings.foveatedRegionCenterY = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("UseEyeTracking"); it != o_json.end() && it->is_boolean()) + settings.useEyeTracking = it->get(); + if (auto it = o_json.find("DebugVisualization"); it != o_json.end() && it->is_boolean()) + settings.debugVisualization = it->get(); + if (auto it = o_json.find("DebugSkipMerge"); it != o_json.end() && it->is_boolean()) + settings.debugSkipMerge = it->get(); + if (auto it = o_json.find("DebugForceAllStencil"); it != o_json.end() && it->is_boolean()) + settings.debugForceAllStencil = it->get(); + if (auto it = o_json.find("DebugForceAllReprojectCS"); it != o_json.end() && it->is_boolean()) + settings.debugForceAllReprojectCS = it->get(); + if (auto it = o_json.find("DebugDepthMap"); it != o_json.end() && it->is_boolean()) + settings.debugDepthMap = it->get(); + if (auto it = o_json.find("FullBlendDistance"); it != o_json.end() && it->is_number()) + settings.fullBlendDistance = std::clamp(it->get(), 0.0f, 50000.0f); + if (auto it = o_json.find("POMDepthScale"); it != o_json.end() && it->is_number()) + settings.pomDepthScale = std::clamp(it->get(), 0.0f, 500.0f); +} + +void VRStereoOptimizations::RestoreDefaultSettings() +{ + settings = {}; +} + +//============================================================================= +// RESOURCE SETUP +//============================================================================= + +void VRStereoOptimizations::SetupResources() +{ + if (!REL::Module::IsVR()) + return; + + auto device = globals::d3d::device; + auto renderer = globals::game::renderer; + + // Constant buffers + paramsCB = eastl::make_unique(ConstantBufferDesc()); + + // Get main RT dimensions for per-eye calculations + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + D3D11_TEXTURE2D_DESC mainDesc; + main.texture->GetDesc(&mainDesc); + + // Per-pixel mode texture (R8_UINT, full SBS resolution = both eyes) + { + D3D11_TEXTURE2D_DESC modeDesc{}; + modeDesc.Width = mainDesc.Width; + modeDesc.Height = mainDesc.Height; + modeDesc.MipLevels = 1; + modeDesc.ArraySize = 1; + modeDesc.Format = DXGI_FORMAT_R8_UINT; + modeDesc.SampleDesc.Count = 1; + modeDesc.SampleDesc.Quality = 0; + modeDesc.Usage = D3D11_USAGE_DEFAULT; + modeDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + modeDesc.CPUAccessFlags = 0; + modeDesc.MiscFlags = 0; + + texPerPixelMode = eastl::make_unique(modeDesc); + texPerPixelMode->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + texPerPixelMode->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + // Depth-stencil state for stencil write pass: + // Depth test OFF (not rendering geometry), stencil ALWAYS + REPLACE with ref=1 + { + D3D11_DEPTH_STENCIL_DESC dssDesc{}; + dssDesc.DepthEnable = FALSE; + dssDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + dssDesc.StencilEnable = TRUE; + dssDesc.StencilReadMask = 0xFF; + dssDesc.StencilWriteMask = 0xFF; + dssDesc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; + dssDesc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + dssDesc.BackFace = dssDesc.FrontFace; + + DX::ThrowIfFailed(device->CreateDepthStencilState(&dssDesc, stencilWriteDSS.put())); + } + + // Rasterizer state for stencil write: no culling, no depth clip + { + D3D11_RASTERIZER_DESC rsDesc{}; + rsDesc.FillMode = D3D11_FILL_SOLID; + rsDesc.CullMode = D3D11_CULL_NONE; + rsDesc.DepthClipEnable = FALSE; + + DX::ThrowIfFailed(device->CreateRasterizerState(&rsDesc, stencilWriteRS.put())); + } + + // Read-only depth DSV for stencil write pass: allows simultaneous depth SRV binding. + // We write stencil but never write depth, so D3D11_DSV_READ_ONLY_DEPTH is safe. + { + auto& depthData = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depthData.views[0] && depthData.texture) { + D3D11_DEPTH_STENCIL_VIEW_DESC dsvDesc{}; + depthData.views[0]->GetDesc(&dsvDesc); + dsvDesc.Flags = D3D11_DSV_READ_ONLY_DEPTH; + + DX::ThrowIfFailed(device->CreateDepthStencilView(depthData.texture, &dsvDesc, stencilWriteReadOnlyDSV.put())); + } else { + logger::warn("[VRStereoOptimizations] Could not create read-only DSV: depth stencil data not available"); + } + } + + CompileShaders(); + + logger::info("[VRStereoOptimizations] Resources created: mode tex {}x{} (full SBS)", mainDesc.Width, mainDesc.Height); +} + +void VRStereoOptimizations::CompileShaders() +{ + std::vector> csDefines = { + { "VR", nullptr }, + { "FRAMEBUFFER", nullptr } + }; + + std::vector> vspsDefines = { + { "VR", nullptr } + }; + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", csDefines, "cs_5_0")) + stencilCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS"); + + { + auto debugDefines = csDefines; + debugDefines.push_back({ "DEBUG_DEPTH_MAP", nullptr }); + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", debugDefines, "cs_5_0")) + stencilDebugDepthMapCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS (DEBUG_DEPTH_MAP)"); + } + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWriteVS.hlsl", vspsDefines, "vs_5_0")) + stencilWriteVS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWriteVS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWritePS.hlsl", vspsDefines, "ps_5_0")) + stencilWritePS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWritePS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\ReprojectionCS.hlsl", csDefines, "cs_5_0")) + reprojectionCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile ReprojectionCS"); + +} + +void VRStereoOptimizations::ClearShaderCache() +{ + stencilCS = nullptr; + stencilDebugDepthMapCS = nullptr; + stencilWriteVS = nullptr; + stencilWritePS = nullptr; + reprojectionCS = nullptr; + dssCache.clear(); +} + +void VRStereoOptimizations::Reset() +{ + stencilActive = false; + stencilSwapCount = 0; +} + +//============================================================================= +// IMGUI SETTINGS +//============================================================================= + +void VRStereoOptimizations::DrawSettings() +{ + const char* modeNames[] = { "Off", "Enable" }; + int currentMode = static_cast(settings.stereoMode); + if (ImGui::Combo("Feature Enable", ¤tMode, modeNames, IM_ARRAYSIZE(modeNames))) + settings.stereoMode = static_cast(currentMode); + + if (settings.stereoMode == StereoMode::Off) + return; + + ImGui::SliderFloat("Disocclusion Depth Threshold", &settings.disocclusionDepthThreshold, 0.001f, 0.1f, "%.4f"); + + if (globals::state->IsDeveloperMode()) { + if (ImGui::TreeNode("Debug")) { + ImGui::SliderFloat("Full Blend Distance", &settings.fullBlendDistance, 0.0f, 10000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Geometry closer than this distance (game units) is fully shaded in both eyes and bilaterally blended for 2x supersampling. 0 = disabled."); + + ImGui::SliderFloat("POM Depth Scale", &settings.pomDepthScale, 0.0f, 500.0f, "%.1f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Scale factor for POM depth correction in stereo reprojection.\n1.0 = physical scale. Increase for more visible POM stereo depth."); + ImGui::Checkbox("Skip Pixel Reprojection", &settings.debugSkipMerge); + ImGui::Checkbox("Full Blend Depth View", &settings.debugFullBlendDepth); + ImGui::Checkbox("Debug POM Depth", &settings.debugPOMDepth); + if (settings.debugFullBlendDepth) + ImGui::TextColored(ImVec4(0, 1, 1, 1), " Cyan = full blend zone (closer = stronger tint)"); + ImGui::Text("Stencil swaps this frame: %u", stencilSwapCount); + ImGui::TreePop(); + } + } +} + +//============================================================================= +// CONSTANT BUFFER UPDATE +//============================================================================= + +void VRStereoOptimizations::UpdateConstantBuffer() +{ + float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); + + VRStereoOptParams params{}; + params.FrameDim[0] = resolution.x; + params.FrameDim[1] = resolution.y; + params.RcpFrameDim[0] = 1.0f / resolution.x; + params.RcpFrameDim[1] = 1.0f / resolution.y; + params.StereoModeValue = static_cast(settings.stereoMode); + params.DisocclusionThreshold = settings.disocclusionDepthThreshold; + params.EdgeDepthThreshold = settings.edgeDepthThreshold; + params.EdgeWidth = 2; + params.QualityJitter[0] = settings.qualityJitterOffset; + params.QualityJitter[1] = settings.qualityJitterOffset; + params.FoveatedRadius = settings.foveatedRegionRadius; + params.FoveatedCenter[0] = settings.foveatedRegionCenterX; + params.FoveatedCenter[1] = settings.foveatedRegionCenterY; + params.MinEdgeDistance = settings.minEdgeDistance; + params.FullBlendDistance = settings.fullBlendDistance; + + paramsCB->Update(params); +} + +//============================================================================= +// PHASE 1: STENCIL CLASSIFICATION + WRITE +//============================================================================= + +void VRStereoOptimizations::DispatchStencil() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!stencilCS || !stencilWriteVS || !stencilWritePS || !texPerPixelMode || !paramsCB || + !stencilWriteReadOnlyDSV || !stencilWriteDSS || !stencilWriteRS) + return; + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Stencil"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Stencil"); + + auto context = globals::d3d::context; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + // Use live depth buffer (kMAIN) instead of kPOST_ZPREPASS_COPY — at StartDeferred time, + // kPOST_ZPREPASS_COPY is stale (previous frame). kMAIN has fresh z-prepass depth so + // StencilCS can correctly detect sky-vs-geometry edges in the current frame. + auto renderer = globals::game::renderer; + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + if (!depthSRV) { + logger::warn("[VRStereoOptimizations] DispatchStencil: depthSRV is null, skipping"); + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); + return; + } + + // Dispatch classification CS over Eye 1 region + // Input: t0 = depth, b1 = params CB + // Output: u0 = per-pixel mode texture + { + ID3D11ShaderResourceView* srvs[1]{ depthSRV }; + ID3D11UnorderedAccessView* uavs[1]{ texPerPixelMode->uav.get() }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 1, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + auto* activeStencilCS = (settings.debugDepthMap && stencilDebugDepthMapCS) ? stencilDebugDepthMapCS.get() : stencilCS.get(); + context->CSSetShader(activeStencilCS, nullptr, 0); + + uint32_t fullWidth = texPerPixelMode->desc.Width; + uint32_t fullHeight = texPerPixelMode->desc.Height; + context->Dispatch((fullWidth + 7) / 8, (fullHeight + 7) / 8, 1); + + // Cleanup CS bindings + ID3D11ShaderResourceView* nullSRV = nullptr; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 1, &nullSRV); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } + + // Transfer classification to hardware stencil buffer + ExecuteStencilWritePass(); + + stencilActive = true; + stencilSwapCount = 0; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::ExecuteStencilWritePass() +{ + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + + // ===== SAVE FULL D3D11 PIPELINE STATE ===== + + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + + ID3D11DepthStencilState* savedDSS = nullptr; + UINT savedStencilRef = 0; + context->OMGetDepthStencilState(&savedDSS, &savedStencilRef); + + ID3D11BlendState* savedBlendState = nullptr; + FLOAT savedBlendFactor[4] = {}; + UINT savedSampleMask = 0; + context->OMGetBlendState(&savedBlendState, savedBlendFactor, &savedSampleMask); + + ID3D11RasterizerState* savedRS = nullptr; + context->RSGetState(&savedRS); + + D3D11_VIEWPORT savedViewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; + UINT numViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + context->RSGetViewports(&numViewports, savedViewports); + + ID3D11VertexShader* savedVS = nullptr; + context->VSGetShader(&savedVS, nullptr, nullptr); + + ID3D11PixelShader* savedPS = nullptr; + context->PSGetShader(&savedPS, nullptr, nullptr); + + ID3D11GeometryShader* savedGS = nullptr; + context->GSGetShader(&savedGS, nullptr, nullptr); + + ID3D11InputLayout* savedInputLayout = nullptr; + context->IAGetInputLayout(&savedInputLayout); + + D3D11_PRIMITIVE_TOPOLOGY savedTopology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + context->IAGetPrimitiveTopology(&savedTopology); + + ID3D11ShaderResourceView* savedPSSRVs[2] = {}; + context->PSGetShaderResources(0, 2, savedPSSRVs); + + ID3D11Buffer* savedPSCB = nullptr; + context->PSGetConstantBuffers(1, 1, &savedPSCB); + + // ===== SET UP STENCIL WRITE PASS ===== + + // Use our custom read-only-depth DSV to allow simultaneous depth SRV binding (t1). + // D3D11_DSV_READ_ONLY_DEPTH permits depth SRV + stencil write simultaneously. + // Using views[0] would cause D3D11 to silently NULL the depth SRV. + // depthData.readOnlyViews[0] has BOTH read-only flags and doesn't allow stencil writes. + // Clear stencil buffer to 0 before writing classification. + // The engine's z-prepass may have written stencil values (e.g., stencil=1) for rendered geometry. + // Without this clear, StencilWritePS discards for MODE_DISOCCLUDED pixels leave the engine's + // stencil value intact, which can match our NOT_EQUAL ref=1 culling test and incorrectly + // skip those pixels during the Lighting pass. + { + auto& depthData = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + context->ClearDepthStencilView(depthData.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0); + } + + context->OMSetRenderTargets(0, nullptr, stencilWriteReadOnlyDSV.get()); + context->OMSetDepthStencilState(stencilWriteDSS.get(), 1); + context->RSSetState(stencilWriteRS.get()); + + // Eye 1 viewport (right half of SBS buffer) + { + D3D11_TEXTURE2D_DESC mainDesc; + renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN].texture->GetDesc(&mainDesc); + + D3D11_VIEWPORT vp{}; + vp.TopLeftX = static_cast(mainDesc.Width / 2); + vp.TopLeftY = 0.0f; + vp.Width = static_cast(mainDesc.Width / 2); + vp.Height = static_cast(mainDesc.Height); + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + } + + // Bind shaders and mode texture + context->VSSetShader(stencilWriteVS.get(), nullptr, 0); + context->PSSetShader(stencilWritePS.get(), nullptr, 0); + context->GSSetShader(nullptr, nullptr, 0); + + ID3D11ShaderResourceView* modeSRV = texPerPixelMode->srv.get(); + context->PSSetShaderResources(0, 1, &modeSRV); + + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + context->PSSetShaderResources(1, 1, &depthSRV); + + // Bind params CB to pixel shader (CS and PS have separate CB bindings) + auto cbPtr = paramsCB->CB(); + context->PSSetConstantBuffers(1, 1, &cbPtr); + + // Fullscreen triangle: no VB/IB, procedurally generated in VS + context->IASetInputLayout(nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + context->Draw(3, 0); + + // ===== RESTORE FULL D3D11 PIPELINE STATE ===== + + ID3D11ShaderResourceView* nullSRVs[2] = {}; + context->PSSetShaderResources(0, 2, nullSRVs); + + context->PSSetConstantBuffers(1, 1, &savedPSCB); + + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + context->OMSetDepthStencilState(savedDSS, savedStencilRef); + context->OMSetBlendState(savedBlendState, savedBlendFactor, savedSampleMask); + context->RSSetState(savedRS); + context->RSSetViewports(numViewports, savedViewports); + context->VSSetShader(savedVS, nullptr, 0); + context->PSSetShader(savedPS, nullptr, 0); + context->GSSetShader(savedGS, nullptr, 0); + context->IASetInputLayout(savedInputLayout); + context->IASetPrimitiveTopology(savedTopology); + context->PSSetShaderResources(0, 2, savedPSSRVs); + + // Release COM references acquired by Get* calls + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + if (savedDSV) + savedDSV->Release(); + if (savedDSS) + savedDSS->Release(); + if (savedBlendState) + savedBlendState->Release(); + if (savedRS) + savedRS->Release(); + if (savedVS) + savedVS->Release(); + if (savedPS) + savedPS->Release(); + if (savedGS) + savedGS->Release(); + if (savedInputLayout) + savedInputLayout->Release(); + if (savedPSSRVs[0]) + savedPSSRVs[0]->Release(); + if (savedPSSRVs[1]) + savedPSSRVs[1]->Release(); + if (savedPSCB) + savedPSCB->Release(); +} + +void VRStereoOptimizations::PerformLateStencilWrite() +{ + // Placeholder for future multi-pass stencil strategies +} + +//============================================================================= +// DSS CACHE: CLONE + STENCIL NOT_EQUAL ENFORCEMENT +//============================================================================= + +ID3D11DepthStencilState* VRStereoOptimizations::GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS) +{ + if (!stencilActive) + return originalDSS; + + // Check cache (nullptr is a valid key — represents D3D11 default state) + if (auto it = dssCache.find(originalDSS); it != dssCache.end()) + return it->second.get(); + + D3D11_DEPTH_STENCIL_DESC desc; + if (originalDSS) { + originalDSS->GetDesc(&desc); + } else { + // D3D11 default state: depth enabled, stencil disabled + desc = {}; + desc.DepthEnable = TRUE; + desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; + desc.DepthFunc = D3D11_COMPARISON_LESS; + desc.StencilEnable = FALSE; + desc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; + desc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.BackFace = desc.FrontFace; + } + + desc.StencilEnable = TRUE; + desc.StencilReadMask = 0xFF; + desc.StencilWriteMask = 0x00; + + desc.FrontFace.StencilFunc = D3D11_COMPARISON_NOT_EQUAL; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.BackFace = desc.FrontFace; + + winrt::com_ptr modifiedDSS; + HRESULT hr = globals::d3d::device->CreateDepthStencilState(&desc, modifiedDSS.put()); + if (FAILED(hr)) { + logger::warn("[VRStereoOptimizations] Failed to create modified DSS (HRESULT: {:#x})", static_cast(hr)); + return originalDSS; + } + + auto* result = modifiedDSS.get(); + dssCache[originalDSS] = std::move(modifiedDSS); + + return result; +} + +//============================================================================= +// PHASE 3: REPROJECTION COMPUTE SHADER +//============================================================================= + +void VRStereoOptimizations::DispatchReprojection() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!reprojectionCS || !texPerPixelMode || !paramsCB) { + DeactivateStencil(); + return; + } + if (settings.debugSkipMerge) { + DeactivateStencil(); + return; + } + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Reprojection"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Reprojection"); + + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + auto* depthSRV = Util::GetCurrentSceneDepthSRV(); + + // Bind: t0 = depth, t1 = mode texture, u0 = main UAV, b1 = params + ID3D11ShaderResourceView* srvs[2]{ + depthSRV, + texPerPixelMode->srv.get() + }; + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 2, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(reprojectionCS.get(), nullptr, 0); + + // Dispatch over Eye 1 only (shader treats dtid as Eye 1 local coords) + uint32_t eyeWidth = texPerPixelMode->desc.Width / 2; + uint32_t eyeHeight = texPerPixelMode->desc.Height; + context->Dispatch((eyeWidth + 7) / 8, (eyeHeight + 7) / 8, 1); + + // Cleanup + ID3D11ShaderResourceView* nullSRVs[2] = {}; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + + // Stencil culling is done for this frame + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::DeactivateStencil() +{ + if (!stencilActive) + return; + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; +} + diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h new file mode 100644 index 0000000000..ff264d8747 --- /dev/null +++ b/src/Features/VRStereoOptimizations.h @@ -0,0 +1,198 @@ +#pragma once + +#include +using json = nlohmann::json; + +#include +#include +#include + +/** + * @brief VR Stereo Rendering Optimizations feature. + * + * Uses hardware stencil culling to skip Eye 1 pixel shading for pixels that can be + * reprojected from Eye 0 via lateral stereo reprojection, then runs a compute shader + * to fill those pixels. This avoids redundant pixel shading in overlapping stereo regions. + * + * Pipeline: + * 1. DispatchStencil() - CS classifies per-pixel reprojection viability into a mode texture, + * then a fullscreen VS/PS pass writes that classification into the stencil buffer. + * 2. (Game renders Eye 1) - Hardware stencil test skips shading for marked pixels. + * 3. DispatchReprojection() - CS reprojects Eye 0 color into the skipped Eye 1 pixels. + */ +struct VRStereoOptimizations +{ + bool loaded = false; + + //============================================================================= + // ENUMS + //============================================================================= + + /// Operating mode for stereo reprojection + enum class StereoMode : uint32_t + { + Off = 0, ///< Feature disabled + Enable = 1 ///< Stereo reprojection enabled + }; + + /// Per-pixel classification written by StencilCS + enum PixelMode : uint8_t + { + MODE_DISOCCLUDED = 0, ///< Fully shaded, no reprojection, no blend + MODE_EDGE = 1, ///< Fully shaded + bilateral blend with other eye + MODE_MAIN = 2, ///< Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite (Perf) / bilateral (Quality) + MODE_EDGE_NEIGHBOUR = 3, ///< Outer band: background pixels near edge, blended in post-process + MODE_FULL_BLEND = 4, ///< Near-camera pixels: fully shaded in both eyes + bilateral blended + }; + + //============================================================================= + // PUBLIC METHODS + //============================================================================= + + void SetupResources(); + void Reset(); + void DrawSettings(); + void SaveSettings(json& o_json); + void LoadSettings(json& o_json); + void RestoreDefaultSettings(); + void ClearShaderCache(); + + //============================================================================= + // SETTINGS + //============================================================================= + + struct Settings + { + StereoMode stereoMode = StereoMode::Enable; + float disocclusionDepthThreshold = 0.01f; + float edgeDepthThreshold = 0.05f; + float minEdgeDistance = 5000.0f; ///< Minimum linearized depth for edge AA (game units) + float fullBlendDistance = 0.0f; ///< Linearized depth below which both eyes are fully shaded + blended (game units) + float pomDepthScale = 22.5f; ///< Scale factor for POM depth correction in stereo reprojection + bool debugFullBlendDepth = false; ///< Show full blend depth zone as cyan overlay + float qualityJitterOffset = 0.125f; + float foveatedRegionRadius = 0.3f; + float foveatedRegionCenterX = 0.5f; + float foveatedRegionCenterY = 0.5f; + bool useEyeTracking = false; + + int reprojectionMode = 5; // 0=Blend, 4=Overwrite, 5=Overwrite Eye1 Only + + // Debug controls + bool debugVisualization = false; + bool debugSkipMerge = false; + bool debugForceAllStencil = false; + bool debugForceAllReprojectCS = false; + bool debugDepthMap = false; + bool debugPOMDepth = false; ///< Show POM depth data (Reflectance.w) as heatmap overlay + + } settings; + + //============================================================================= + // GPU CONSTANT BUFFER (must match HLSL cbuffer layout exactly) + //============================================================================= + + struct alignas(16) VRStereoOptParams + { + float FrameDim[2]; // Full stereo buffer dimensions + float RcpFrameDim[2]; // 1.0 / FrameDim + + uint32_t StereoModeValue; // Cast of StereoMode enum (0-3) + float DisocclusionThreshold; + float EdgeDepthThreshold; + uint32_t EdgeWidth; + + float QualityJitter[2]; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; + float pad2; + + float FoveatedCenter[2]; // Foveal region center UV + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth for full blend zone + }; + static_assert(sizeof(VRStereoOptParams) % 16 == 0, "VRStereoOptParams must be 16-byte aligned for HLSL cbuffer."); + + //============================================================================= + // PUBLIC API + //============================================================================= + + /** + * @brief Classify Eye 1 pixels and write stencil marks. + * + * Dispatches the stencil classification CS, then performs a fullscreen triangle pass + * to write the classification into the hardware stencil buffer. + * Called from Deferred::StartDeferred() after OverrideBlendStates(). + */ + void DispatchStencil(); + + /** + * @brief Reproject Eye 0 color into stencil-culled Eye 1 pixels. + * + * Copies the main render target, then dispatches a CS to fill skipped pixels + * using lateral reprojection from Eye 0. + * Called from Deferred::DeferredPasses() after DeferredCompositeCS. + */ + void DispatchReprojection(); + + /** + * @brief Creates or retrieves a modified DSS with stencil NOT_EQUAL test. + * + * Clones the given DSS with read-only stencil (WriteMask=0x00, Func=NOT_EQUAL, ref=1) + * so that pixels marked by our stencil write pass are skipped during normal rendering. + * Cached per unique input DSS pointer. + * + * @param originalDSS The original depth-stencil state to modify. + * @return Modified DSS with stencil test, or original if creation fails. + */ + ID3D11DepthStencilState* GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS); + + /// Whether the stencil pass is currently active this frame + bool IsStencilActive() const { return stencilActive; } + + /// Deactivate stencil culling (called from Deferred after geometry rendering completes) + void DeactivateStencil(); + + /// Get mode texture SRV for external consumers (e.g., DeferredCompositeCS Eye 1 skip) + ID3D11ShaderResourceView* GetModeTextureSRV() const { return texPerPixelMode ? texPerPixelMode->srv.get() : nullptr; } + +private: + //============================================================================= + // INTERNAL METHODS + //============================================================================= + + /// Fullscreen triangle pass: reads mode texture, writes stencil ref=1 for MODE_MAIN pixels + void ExecuteStencilWritePass(); + + /// Late stencil write callback (placeholder for future multi-pass strategies) + void PerformLateStencilWrite(); + + /// Compiles all shaders used by this feature + void CompileShaders(); + + /// Updates the constant buffer with current settings and frame dimensions + void UpdateConstantBuffer(); + + //============================================================================= + // GPU RESOURCES + //============================================================================= + + eastl::unique_ptr paramsCB; + eastl::unique_ptr texPerPixelMode; ///< R8_UINT classification texture (full SBS resolution) + eastl::unique_ptr reprojectionCopyTex; ///< Copy of main RT for reprojection read + + winrt::com_ptr stencilWriteDSS; + winrt::com_ptr stencilWriteRS; + winrt::com_ptr stencilWriteReadOnlyDSV; ///< Read-only-depth DSV for stencil write pass (allows simultaneous depth SRV) + + winrt::com_ptr stencilCS; + winrt::com_ptr stencilDebugDepthMapCS; + winrt::com_ptr stencilWriteVS; + winrt::com_ptr stencilWritePS; + winrt::com_ptr reprojectionCS; + + /// Cache of original DSS -> modified DSS with stencil NOT_EQUAL enforcement + std::unordered_map> dssCache; + + bool stencilActive = false; + uint32_t stencilSwapCount = 0; +}; diff --git a/src/Globals.cpp b/src/Globals.cpp index e90c3bf4ce..52de7e7bd4 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -266,13 +266,79 @@ namespace globals { static void thunk(ID3D11DeviceContext* This, ID3D11Resource* pResource, UINT Subresource) { - if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) + if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) { CacheFramebuffer(); + } func(This, pResource, Subresource); } static inline REL::Relocation func; }; + /** + * @brief Hooked OMSetDepthStencilState — replaces DSS with stencil-enforcing version when VR stereo opt is active. + * + * vtable index 36 for ID3D11DeviceContext::OMSetDepthStencilState. + * When VRStereoOptimizations has written stencil marks, this hook transparently swaps + * the game's DSS for a modified version that adds a stencil NOT_EQUAL test, causing + * marked Eye 1 pixels to be skipped during normal rendering. + */ + struct ID3D11DeviceContext_OMSetDepthStencilState + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilState* pDepthStencilState, UINT StencilRef) + { + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vr.stereoOpt; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + pDepthStencilState = stereoOpt.GetOrCreateModifiedDSS(pDepthStencilState); + StencilRef = 1; // Must match the ref written by our stencil pass + } + } + func(This, pDepthStencilState, StencilRef); + } + static inline REL::Relocation func; + }; + + /** + * @brief Hooked ClearDepthStencilView — blocks stencil clears when VR stereo opt stencil is active. + * + * vtable index 53 for ID3D11DeviceContext::ClearDepthStencilView. + * Prevents the game from clearing our stencil marks between the stencil write and + * the reprojection pass by stripping the D3D11_CLEAR_STENCIL flag. + */ + struct ID3D11DeviceContext_ClearDepthStencilView + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilView* pDepthStencilView, UINT ClearFlags, FLOAT Depth, UINT8 Stencil) + { + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vr.stereoOpt; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + // Only protect the main scene DSV — allow other DSVs to clear normally + auto renderer = globals::game::renderer; + auto& mainDepth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (mainDepth.views[0]) { + // Compare the DSV being cleared against the main scene DSV + ID3D11Resource* clearRes = nullptr; + ID3D11Resource* mainRes = nullptr; + pDepthStencilView->GetResource(&clearRes); + mainDepth.views[0]->GetResource(&mainRes); + bool isMainDSV = (clearRes == mainRes); + if (clearRes) + clearRes->Release(); + if (mainRes) + mainRes->Release(); + if (isMainDSV) { + ClearFlags &= ~D3D11_CLEAR_STENCIL; + if (ClearFlags == 0) + return; + } + } + } + } + func(This, pDepthStencilView, ClearFlags, Depth, Stencil); + } + static inline REL::Relocation func; + }; + /** * @brief Installs hooks on the Map and Unmap methods of the provided D3D11 device context. * @@ -282,5 +348,11 @@ namespace globals { stl::detour_vfunc<14, ID3D11DeviceContext_Map>(a_context); stl::detour_vfunc<15, ID3D11DeviceContext_Unmap>(a_context); + + // VR stereo optimization hooks: intercept DSS and stencil clear + if (globals::game::isVR) { + stl::detour_vfunc<36, ID3D11DeviceContext_OMSetDepthStencilState>(a_context); + stl::detour_vfunc<53, ID3D11DeviceContext_ClearDepthStencilView>(a_context); + } } } diff --git a/src/State.cpp b/src/State.cpp index 13bf1681e7..89ce7f819f 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -11,6 +11,7 @@ #include "Features/TerrainBlending.h" #include "Features/TerrainHelper.h" #include "Features/Upscaling.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricShadows.h" #include "Features/WeatherEditor.h" #include "Menu.h" From a12530b318f6aee09660258298de298fe08b1ee4 Mon Sep 17 00:00:00 2001 From: vrnord Date: Mon, 23 Mar 2026 09:42:40 -0600 Subject: [PATCH 2/3] feat(vr-dlss): VR DLSS viewport scaling with periphery TAA Process a configurable central crop through DLSS, reducing GPU cost. Periphery filled with bilinear upscale or TAA'd content via conductor. Feathered composite at crop boundary hides quality transition. Nasal crop offset shifts DLSS region toward nose for higher acuity. Key components: - Per-eye DLSS evaluation with viewport-scaled projection matrices - ClearHMDMask zeroes hidden area mesh pixels before DLSS - VRPeripheryFillCS bilinear upscales render-res to display-res - FeatheredCompositePS alpha-blends DLSS crop onto periphery - TAAReorder conductor architecture for periphery TAA - REL::Module::IsVR() in device-creation hook (timing fix) - Bounds guards in ClearHMDMaskCS and ForceAlphaCS - Per-frame flag resets in TAAReorder - Complete pipeline state save/restore in BSImagespaceShaderHook - Crop parameter clamping before Streamline constants Based on PureDark's Skyrim-Upscaler VR conductor architecture (MIT). Depends on: feat(vr): VR stereo reprojection optimizations Replaces PR #1983. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../Shaders/Upscaling/ClearHMDMaskCS.hlsl | 47 +- .../Shaders/Upscaling/DLSSCompositePS.hlsl | 48 + .../Shaders/Upscaling/DepthUpscalePS.hlsl | 70 ++ .../Upscaling/FeatheredCompositeCS.hlsl | 41 + .../Upscaling/FeatheredCompositePS.hlsl | 56 ++ .../Shaders/Upscaling/ForceAlphaCS.hlsl | 17 + .../Shaders/Upscaling/VRPeripheryFillCS.hlsl | 24 + src/Features/TAAReorder.cpp | 601 +++++++++++++ src/Features/TAAReorder.h | 182 ++++ src/Features/Upscaling.cpp | 826 ++++++++++++++++-- src/Features/Upscaling.h | 56 +- src/Features/Upscaling/Streamline.cpp | 204 ++++- src/Features/Upscaling/Streamline.h | 6 +- 13 files changed, 2080 insertions(+), 98 deletions(-) create mode 100644 features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl create mode 100644 features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl create mode 100644 src/Features/TAAReorder.cpp create mode 100644 src/Features/TAAReorder.h diff --git a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl index df107d9175..301b51651e 100644 --- a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl @@ -4,20 +4,53 @@ // depth == 0.0 is the unrendered/hidden area value (Skyrim reversed-Z: far plane = 0). // DepthIn is the combined stereo depth buffer; DepthOffsetX selects the eye's half. // ColorInOut is the isolated per-eye buffer; ColorOffsetX is always 0. +// +// When DepthWidth > 0, coordinate scaling is enabled: depth is at render-res while +// color is at display-res. The shader maps display-res color coordinates to render-res +// depth coordinates for the mask lookup. +// +// FallbackIn (t1): when bound, masked pixels read from this texture instead of writing +// black. When unbound, D3D11 returns (0,0,0,0) — same as clearing to black. +// FallbackOffsetX selects the eye's half in the stereo fallback texture. cbuffer ClearHMDMaskCB : register(b0) { - uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) - uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) - uint pad0; - uint pad1; + uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) + uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) + uint DepthOffsetY; // Y offset into combined stereo depth (non-zero when viewport scaling crops vertically) + uint FallbackOffsetX; // X offset into FallbackIn for stereo (0 when unused or left eye) + // Optional coordinate scaling (zero = disabled, for backwards compat) + uint DepthWidth; // render-res eye width; if 0, no scaling (1:1 depth/color coords) + uint DepthHeight; // render-res eye height + uint ColorWidth; // display-res eye width + uint ColorHeight; // display-res eye height }; Texture2D DepthIn : register(t0); +Texture2D FallbackIn : register(t1); RWTexture2D ColorInOut : register(u0); [numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { - // Read from stereo depth, write to potentially stereo color - if (DepthIn[dispatchID.xy + uint2(DepthOffsetX, 0)] == 0.0) - ColorInOut[dispatchID.xy + uint2(ColorOffsetX, 0)] = float4(0.0, 0.0, 0.0, 0.0); + uint w, h; + ColorInOut.GetDimensions(w, h); + if (dispatchID.x >= w || dispatchID.y >= h) + return; + + uint2 colorPos = dispatchID.xy + uint2(ColorOffsetX, 0); + uint2 depthPos; + + if (DepthWidth > 0) { + // Scale from display-res color coordinates to render-res depth coordinates + depthPos = uint2( + (dispatchID.x * DepthWidth) / ColorWidth, + (dispatchID.y * DepthHeight) / ColorHeight) + + uint2(DepthOffsetX, DepthOffsetY); + } else { + depthPos = dispatchID.xy + uint2(DepthOffsetX, DepthOffsetY); + } + + if (DepthIn[depthPos] == 0.0) + ColorInOut[colorPos] = FallbackIn[dispatchID.xy + uint2(FallbackOffsetX, 0)]; + // When FallbackIn is unbound (existing callers): returns (0,0,0,0) → clears to black + // When FallbackIn is bound (TAA mask restore): returns display RT content } diff --git a/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl new file mode 100644 index 0000000000..8cf6b900d3 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl @@ -0,0 +1,48 @@ +// Format-converting fullscreen pixel shader with optional bilinear upscale. +// Used by TAAReorder to composite between textures of different DXGI formats +// (e.g. R8G8B8A8_UNORM conductor RTs <-> R11G11B10_FLOAT kMAIN). +// The GPU's output merger handles format conversion automatically. +// +// BILINEAR_UPSCALE variant: upscales render-res content to display-res by +// mapping output pixel positions through the dynamic resolution scale, +// like PureDark's dynamicResScale in his blend shader. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D Source : register(t0); + +# ifdef BILINEAR_UPSCALE + +cbuffer CompositeCB : register(b0) +{ + float2 DynResScale; // renderRes / displayRes (per-eye) + float2 EyeOffset; // (i * eyeWidth, 0) in texels + float2 SrcTexSize; // full texture dimensions in texels + float2 pad; +}; + +SamplerState LinearSampler : register(s0); + +float4 main(VS_OUTPUT input) : SV_Target +{ + // Map display-res pixel position to render-res source position. + // Subtract eye offset, scale to render-res, add eye offset back. + float2 localPos = input.Position.xy - EyeOffset; + float2 srcLocal = localPos * DynResScale; + float2 srcPos = srcLocal + EyeOffset; + float2 srcUV = srcPos / SrcTexSize; + return Source.SampleLevel(LinearSampler, srcUV, 0); +} + +# else + +float4 main(VS_OUTPUT input) : SV_Target +{ + return Source.Load(int3(input.Position.xy, 0)); +} + +# endif // BILINEAR_UPSCALE + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl new file mode 100644 index 0000000000..e5650af665 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl @@ -0,0 +1,70 @@ +/** + * @file DepthUpscalePS.hlsl + * @brief Point-sampled depth buffer upscaling for VR depth-based culling + * + * When upscaling (FSR/DLSS) is active, the depth buffer is rendered at a lower + * resolution than the display. Skyrim VR's depth-based culling (OBBOcclusionTesting) + * reads from the depth buffer to determine object visibility, but with a mismatched + * resolution, objects may be incorrectly culled (appearing to flicker in/out of view). + * + * This shader upscales the low-resolution depth buffer to full resolution using + * pure point sampling. Previous conservative blending (GatherRed + lerp toward + * min depth) caused HAM mask bleed: depth == 0 values from the hidden area mesh + * leaked into valid depth through the 2x2 neighborhood blend, creating artifacts + * at the mask boundary after DRS upscaling. + * + * Based on depth upscaling approach by vrnord + * https://github.com/vrnord/skyrim-community-shaders-VR-DLSS + */ + +#include "Upscaling/UpscaleVS.hlsl" + +#if defined(PSHADER) +# include "Common/FrameBuffer.hlsli" +# include "Common/SharedData.hlsli" + +typedef VS_OUTPUT PS_INPUT; + +struct PS_OUTPUT +{ + float Depth: SV_Depth; +}; + +Texture2D DepthLowRes : register(t0); + +cbuffer DepthUpscaleCB : register(b0) +{ + float2 SourceDim; // Full texture dimensions (texels) + float2 InvSourceDim; // 1.0 / SourceDim + float2 Scale; // resolutionScale (render/display ratio) + float2 Pad; +}; + +/** + * @brief Main pixel shader entry point + * + * Pure point-sampled depth upscaling. Maps display-res pixel position to + * render-res texel and loads directly — no blending, no mask bleed. + */ +PS_OUTPUT main(PS_INPUT input) +{ + PS_OUTPUT psout; + + // Map full-res UV to render-res UV (same transform as the engine's + // GetDynamicResolutionAdjustedScreenPosition). + float2 uv = Scale * input.TexCoord; + + // Per-eye clamping for SBS stereo: prevent sampling across the center seam. + bool isRight = input.TexCoord.x >= 0.5; + float halfScale = 0.5 * Scale.x; + uv.x = clamp(uv.x, isRight ? halfScale : 0.0, isRight ? Scale.x : halfScale); + uv.y = clamp(uv.y, 0.0, Scale.y); + + // Nearest texel coordinate — pure point sampling, no blending + int2 texel = int2(floor(uv * SourceDim)); + psout.Depth = DepthLowRes.Load(int3(texel, 0)); + + return psout; +} + +#endif diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl new file mode 100644 index 0000000000..16116fb4e0 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl @@ -0,0 +1,41 @@ +cbuffer FeatherCB : register(b0) +{ + uint CropX; // paste position X in output space + uint CropY; // paste position Y in output space + uint CropW; // crop width + uint CropH; // crop height + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float3 pad; +}; + +Texture2D CropTexture : register(t0); // DLSS output (crop-sized, at {0,0}) +RWTexture2D OutputTexture : register(u0); // vrFinalOutput (already filled with periphery) + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + // dispatchID is in crop-local space (0..CropW-1, 0..CropH-1) + int2 cropLocal = int2(dispatchID.xy); + if (cropLocal.x >= (int)CropW || cropLocal.y >= (int)CropH) + return; + + // Output pixel = crop-local + paste offset + int2 pixel = cropLocal + int2(CropX, CropY); + + // Distance from nearest crop edge (positive = inside) + float distLeft = (float)cropLocal.x; + float distRight = (float)(CropW - 1 - cropLocal.x); + float distTop = (float)cropLocal.y; + float distBottom = (float)(CropH - 1 - cropLocal.y); + float distFromEdge = min(min(distLeft, distRight), min(distTop, distBottom)); + + float4 dlss = CropTexture.Load(int3(cropLocal, 0)); + + if (FeatherWidth <= 0.0 || distFromEdge >= FeatherWidth) { + // Inside crop interior or no feathering: 100% DLSS + OutputTexture[pixel] = dlss; + } else { + // Feather zone: smooth blend from periphery (TAA-stabilized) to DLSS + float blend = smoothstep(0.0, FeatherWidth, distFromEdge); + float4 periphery = OutputTexture[pixel]; + OutputTexture[pixel] = lerp(periphery, dlss, blend); + } +} diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl new file mode 100644 index 0000000000..fb6ae3f277 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl @@ -0,0 +1,56 @@ +// Feathered DLSS crop composite using hardware alpha blending. +// Based on PureDark's approach from Skyrim-Upscaler VR (MIT license). +// +// The render target already contains TAA'd periphery content. +// We output float4(DLSSColor, featherAlpha) and let the output merger's +// SrcAlpha/InvSrcAlpha blend preserve the periphery in the feather zone +// and outside the crop rect entirely. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D CropTexture : register(t0); +SamplerState LinearSampler : register(s0); + +cbuffer FeatheredCompositeCB : register(b0) +{ + float2 CropOrigin; // paste position (x, y) in output-eye pixel coords + float2 CropSize; // crop width, height in pixels + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float _pad0; + float2 SrcUVOrigin; // UV origin in source texture for this crop region + float2 SrcUVScale; // UV scale: maps [0,1] crop-local UV to source texture UV range +}; + +float4 main(VS_OUTPUT input) : SV_Target +{ + float2 pixelPos = input.Position.xy; + + // Distance from each edge of the crop rect (positive = inside) + float distLeft = pixelPos.x - CropOrigin.x; + float distRight = (CropOrigin.x + CropSize.x) - pixelPos.x; + float distTop = pixelPos.y - CropOrigin.y; + float distBottom = (CropOrigin.y + CropSize.y) - pixelPos.y; + + float minDist = min(min(distLeft, distRight), min(distTop, distBottom)); + + // Outside crop rect: fully transparent (hardware blend preserves TAA'd periphery) + if (minDist <= 0.0) + return float4(0, 0, 0, 0); + + // Feather alpha: smoothstep ramp from 0 at edge to 1 at FeatherWidth inside + // (matches the smoothstep from the original CS for visual consistency) + float alpha = (FeatherWidth > 0.0) ? smoothstep(0.0, FeatherWidth, minDist) : 1.0; + + // Map pixel position to crop-local UV [0,1], then remap to source texture UV. + // For per-eye textures: SrcUVOrigin=(0,0), SrcUVScale=(1,1) (identity). + // For SBS textures: SrcUVOrigin/Scale select the correct eye's crop region. + float2 cropUV = (pixelPos - CropOrigin) / CropSize; + float2 srcUV = cropUV * SrcUVScale + SrcUVOrigin; + float3 dlssColor = CropTexture.SampleLevel(LinearSampler, srcUV, 0).rgb; + + return float4(dlssColor, alpha); +} + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl new file mode 100644 index 0000000000..0856c6bd26 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl @@ -0,0 +1,17 @@ +// Forces alpha to 1.0 across the entire texture. +// Used after DLSS center paste onto submit texture to ensure Scaleform UI renders. +// DLSS output may have alpha=0 (from R11G11B10→R8G8B8A8 conversion with no alpha source), +// which can prevent UI compositing in the DLSS center area. + +RWTexture2D ColorInOut : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + uint w, h; + ColorInOut.GetDimensions(w, h); + if (dispatchID.x >= w || dispatchID.y >= h) + return; + + float4 c = ColorInOut[dispatchID.xy]; + c.a = 1.0; + ColorInOut[dispatchID.xy] = c; +} diff --git a/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl new file mode 100644 index 0000000000..315541e76d --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl @@ -0,0 +1,24 @@ +// Bilinear upscale from render-resolution per-eye buffer to display-resolution per-eye buffer. +// Used for VR viewport scaling: fills the full eye output with a cheap upscale so the +// periphery (outside the DLSS-processed center) is not black/empty. + +cbuffer PeripheryFillCB : register(b0) +{ + uint SrcWidth; + uint SrcHeight; + uint DstWidth; + uint DstHeight; +}; + +Texture2D SrcTexture : register(t0); +SamplerState LinearSampler : register(s0); +RWTexture2D DstTexture : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + if (dispatchID.x >= DstWidth || dispatchID.y >= DstHeight) + return; + + // Normalized UV with half-pixel offset for correct bilinear sampling + float2 uv = (float2(dispatchID.xy) + 0.5) / float2(DstWidth, DstHeight); + DstTexture[dispatchID.xy] = SrcTexture.SampleLevel(LinearSampler, uv, 0); +} diff --git a/src/Features/TAAReorder.cpp b/src/Features/TAAReorder.cpp new file mode 100644 index 0000000000..015a95eee6 --- /dev/null +++ b/src/Features/TAAReorder.cpp @@ -0,0 +1,601 @@ +/** + * @brief TAA Periphery Reordering for VR DLSS Viewport Scaling + * + * This implementation follows the approach pioneered by PureDark's Skyrim Upscaler + * (https://github.com/PureDark/Skyrim-Upscaler/tree/VR), which demonstrated how to + * reorder Skyrim's post-processing pipeline to run vanilla TAA on the periphery while + * DLSS processes a cropped center region. No code was copied; the approach was used as + * a reference for the conductor/hook architecture. + * + * PureDark's Skyrim Upscaler is licensed under the MIT License: + * Copyright (c) 2022 PureDark + * https://github.com/PureDark/Skyrim-Upscaler/blob/VR/LICENSE + */ +#include "TAAReorder.h" + +#include "Globals.h" +#include "Upscaling.h" +#include +#include + +namespace TAAReorder +{ + bool ShouldReorderTAA() + { + if (!g_initialized) + return false; + auto& upscaling = globals::features::upscaling; + return globals::game::isVR && + upscaling.settings.vrPeripheryTAA && + upscaling.settings.vrDlssViewportScale < 1.0f && + upscaling.GetUpscaleMethod() == Upscaling::UpscaleMethod::kDLSS; + } + + // ─── Setter A: Force TAA (pass-through) ─── + void ForceTAASetter::thunk() + { + func(); + } + + // ─── Setter B: TAA State Machine (pass-through) ─── + void TAAStateMachine::thunk() + { + func(); + } + + // ─── EnsurePostPPCopy: create/resize staging texture matching source ─── + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex) + { + D3D11_TEXTURE2D_DESC srcDesc; + sourceTex->GetDesc(&srcDesc); + + if (g_postPPCopy) { + D3D11_TEXTURE2D_DESC existingDesc; + g_postPPCopy->GetDesc(&existingDesc); + if (existingDesc.Width == srcDesc.Width && existingDesc.Height == srcDesc.Height && + existingDesc.Format == srcDesc.Format) + return; + } + + D3D11_TEXTURE2D_DESC desc = srcDesc; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.MiscFlags = 0; + g_postPPCopy = nullptr; + g_postPPCopySRV = nullptr; + globals::d3d::device->CreateTexture2D(&desc, nullptr, g_postPPCopy.put()); + + if (g_postPPCopy) { + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + globals::d3d::device->CreateShaderResourceView(g_postPPCopy.get(), &srvDesc, g_postPPCopySRV.put()); + Util::SetResourceName(g_postPPCopy.get(), "TAAReorder_PostPPCopy"); + } + } + + // ─── Helper: set up common fullscreen rendering state ─── + static void SetupFullscreenState(ID3D11DeviceContext* context, float vpX, float vpY, float vpW, float vpH) + { + D3D11_VIEWPORT viewport = {}; + viewport.TopLeftX = vpX; + viewport.TopLeftY = vpY; + viewport.Width = vpW; + viewport.Height = vpH; + viewport.MaxDepth = 1.0f; + + auto& upscaling = globals::features::upscaling; + context->RSSetViewports(1, &viewport); + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + context->OMSetBlendState(upscaling.upscaleBlendState.get(), nullptr, 0xffffffff); + } + + // ─── Helper: draw fullscreen triangle (point-sample format-converting copy) ─── + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH) + { + auto& upscaling = globals::features::upscaling; + auto context = globals::d3d::context; + + SetupFullscreenState(context, vpX, vpY, vpW, vpH); + context->PSSetShader(upscaling.GetDlssCompositePS(), nullptr, 0); + + ID3D11ShaderResourceView* srvs[] = { srcSRV }; + context->PSSetShaderResources(0, 1, srvs); + + ID3D11RenderTargetView* rtvs[] = { dstRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + context->Draw(3, 0); + } + + // ─── ExecutePass hook: capture Phase 2A output, detect Phase 5 ─── + void ExecutePassHook::thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag) + { + bool isPeripheryTAA = ShouldReorderTAA(); + bool shouldLog = (g_diagCounter == 0); + + // Compute pass index for Phase 2A / Phase 5 detection + int passIndex = -1; + if (isPeripheryTAA || shouldLog) { + uintptr_t managerAddr = (uintptr_t)manager; + uintptr_t passArrayBase = *(uintptr_t*)(managerAddr + 0x28); + if (passArrayBase) { + for (int i = 0; i < 40; i++) { + if (*(uintptr_t*)(passArrayBase + i * 8) == (uintptr_t)passObj) { + passIndex = i; + break; + } + } + } + } + + if (shouldLog) + logger::info("[TAAReorder] ExecutePass: src=0x{:X} dst=0x{:X} flag={} passIdx={}", + srcTech, dstTech, flag, passIndex); + + // Execute the original pass + func(manager, passObj, srcTech, dstTech, extraData, flag); + + // After Phase 2A: copy output RT to g_postPPCopy for DLSS to process + if (isPeripheryTAA && passIndex == 30 && dstTech == 0x29) { + ID3D11RenderTargetView* postRTV = nullptr; + globals::d3d::context->OMGetRenderTargets(1, &postRTV, nullptr); + if (postRTV) { + ID3D11Resource* res = nullptr; + postRTV->GetResource(&res); + if (res) { + ID3D11Texture2D* postTex = nullptr; + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&postTex); + if (postTex) { + EnsurePostPPCopy(postTex); + globals::d3d::context->CopyResource(g_postPPCopy.get(), postTex); + g_postPPReady = true; + if (shouldLog) { + D3D11_TEXTURE2D_DESC desc; + postTex->GetDesc(&desc); + logger::info("[TAAReorder] Phase 2A output: {}x{} fmt={} → copied to g_postPPCopy", + desc.Width, desc.Height, (uint32_t)desc.Format); + } + postTex->Release(); + } + res->Release(); + } + postRTV->Release(); + } + } + + // Detect Phase 5 completion + if (isPeripheryTAA && passIndex == 35) { + g_phase5Complete = true; + if (shouldLog) + logger::info("[TAAReorder] Phase 5 complete (passIdx=35)"); + } + + // ─── Deferred DLSS paste: after Phase 5 (TAA) completes ─── + // TAA has now processed the entire submit texture (periphery is anti-aliased). + // Paste the sharp DLSS center on top, overwriting the TAA'd center region. + if (isPeripheryTAA && passIndex == 35 && g_dlssReady && g_submitTexForPaste && g_postPPCopy) { + auto context = globals::d3d::context; + auto& upscaling = globals::features::upscaling; + + auto screenSize = globals::state->screenSize; + uint32_t eyeW = (uint32_t)(screenSize.x / 2); + uint32_t eyeH = (uint32_t)screenSize.y; + float vpScale = upscaling.settings.vrDlssViewportScale; + uint32_t centerW = (uint32_t)(eyeW * vpScale); + uint32_t centerH = (uint32_t)(eyeH * vpScale); + uint32_t baseCenterX = (eyeW - centerW) / 2; + uint32_t centerY = (eyeH - centerH) / 2; + + int32_t nasalShift = (int32_t)(upscaling.settings.vrDlssCropOffsetX * eyeW); + + float featherWidth = upscaling.settings.vrDlssFeatherWidth; + float featherPixels = featherWidth * eyeW; + + // Feathered blend path: create RTV from submit texture for PS composite + bool useFeathered = featherPixels > 0.0f && upscaling.vrFeatheredCompositePS && upscaling.vrFeatheredCompositeBlendState; + if (useFeathered) { + D3D11_TEXTURE2D_DESC texDesc; + g_submitTexForPaste->GetDesc(&texDesc); + + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = texDesc.Format; + rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + + ID3D11RenderTargetView* pasteRTV = nullptr; + HRESULT hr = globals::d3d::device->CreateRenderTargetView(g_submitTexForPaste.get(), &rtvDesc, &pasteRTV); + + if (SUCCEEDED(hr) && pasteRTV) { + // Save pipeline state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11RenderTargetView* oldRTV = nullptr; + ID3D11DepthStencilView* oldDSV = nullptr; + context->OMGetRenderTargets(1, &oldRTV, &oldDSV); + + ID3D11VertexShader* oldVS = nullptr; + context->VSGetShader(&oldVS, nullptr, nullptr); + ID3D11PixelShader* oldPS = nullptr; + context->PSGetShader(&oldPS, nullptr, nullptr); + ID3D11InputLayout* oldIL = nullptr; + context->IAGetInputLayout(&oldIL); + D3D11_PRIMITIVE_TOPOLOGY oldTopo; + context->IAGetPrimitiveTopology(&oldTopo); + ID3D11RasterizerState* oldRS = nullptr; + context->RSGetState(&oldRS); + + UINT oldNumVPs = 1; + D3D11_VIEWPORT oldVP; + context->RSGetViewports(&oldNumVPs, &oldVP); + + ID3D11ShaderResourceView* oldPSSRV = nullptr; + context->PSGetShaderResources(0, 1, &oldPSSRV); + ID3D11SamplerState* oldPSSampler = nullptr; + context->PSGetSamplers(0, 1, &oldPSSampler); + ID3D11Buffer* oldPSCB = nullptr; + context->PSGetConstantBuffers(0, 1, &oldPSCB); + + // Ensure CB exists + if (!upscaling.vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + globals::d3d::device->CreateBuffer(&cbDesc, nullptr, upscaling.vrFeatheredCompositeCB.put()); + } + + // Set pipeline state for feathered composite + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->PSSetShader(upscaling.vrFeatheredCompositePS.get(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(upscaling.vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + ID3D11ShaderResourceView* srvs[1] = { g_postPPCopySRV.get() }; + context->PSSetShaderResources(0, 1, srvs); + + ID3D11RenderTargetView* rtvs[1] = { pasteRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + if (!upscaling.vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, upscaling.vrLinearSampler.put()); + } + ID3D11SamplerState* samplers[1] = { upscaling.vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + D3D11_VIEWPORT vp = {}; + vp.TopLeftX = (float)eyeOffset; + vp.TopLeftY = 0.0f; + vp.Width = (float)eyeW; + vp.Height = (float)eyeH; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + uint32_t fullW = eyeW * 2; + uint32_t fullH = eyeH; + float srcUVOriginX = (float)(eyeOffset + offsetCenterX) / (float)fullW; + float srcUVOriginY = (float)centerY / (float)fullH; + float srcUVScaleX = (float)centerW / (float)fullW; + float srcUVScaleY = (float)centerH / (float)fullH; + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(upscaling.vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)(eyeOffset + offsetCenterX), (float)centerY, + (float)centerW, (float)centerH, + featherPixels, 0.0f, + srcUVOriginX, srcUVOriginY, + srcUVScaleX, srcUVScaleY, + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(upscaling.vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { upscaling.vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + context->Draw(3, 0); + } + + if (shouldLog) + logger::info("[TAAReorder] Deferred paste: feathered composite {}x{} at ({},{}) feather={:.1f}px nasalShift={} per-eye", + centerW, centerH, baseCenterX, centerY, featherPixels, nasalShift); + + // Restore pipeline state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + context->RSSetViewports(1, &oldVP); + context->VSSetShader(oldVS, nullptr, 0); + context->PSSetShader(oldPS, nullptr, 0); + context->IASetInputLayout(oldIL); + context->IASetPrimitiveTopology(oldTopo); + context->RSSetState(oldRS); + context->PSSetShaderResources(0, 1, &oldPSSRV); + context->PSSetSamplers(0, 1, &oldPSSampler); + context->PSSetConstantBuffers(0, 1, &oldPSCB); + context->OMSetRenderTargets(1, &oldRTV, oldDSV); + + if (oldBlendState) oldBlendState->Release(); + if (oldRTV) oldRTV->Release(); + if (oldDSV) oldDSV->Release(); + if (oldVS) oldVS->Release(); + if (oldPS) oldPS->Release(); + if (oldIL) oldIL->Release(); + if (oldRS) oldRS->Release(); + if (oldPSSRV) oldPSSRV->Release(); + if (oldPSSampler) oldPSSampler->Release(); + if (oldPSCB) oldPSCB->Release(); + + pasteRTV->Release(); + } else { + useFeathered = false; + if (shouldLog) + logger::warn("[TAAReorder] Deferred paste: failed to create RTV for feathered path"); + } + } + if (!useFeathered) { + // Hard edge: CopySubresourceRegion + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + D3D11_BOX srcBox = { + eyeOffset + offsetCenterX, centerY, 0, + eyeOffset + offsetCenterX + centerW, centerY + centerH, 1 + }; + context->CopySubresourceRegion(g_submitTexForPaste.get(), 0, + eyeOffset + offsetCenterX, centerY, 0, + g_postPPCopy.get(), 0, &srcBox); + } + + if (shouldLog) + logger::info("[TAAReorder] Deferred paste: hard-copy {}x{} at ({},{}) nasalShift={} per-eye", + centerW, centerH, baseCenterX, centerY, nasalShift); + } + + g_dlssPasteComplete = true; + g_submitTexForPaste = nullptr; // Release reference + } + } + + // ─── BSImagespaceShader hook: DLSS eval + paste after pipeline completes ─── + // Wraps call at 0x132C827 (write_thunk_call). func() encompasses the + // conductor (Phase 2A) but NOT Phase 5 (TAA+DRS) — Phase 5 runs after us. + // We evaluate DLSS on the captured Phase 2A output and save the submit + // texture reference. The actual paste happens in ExecutePassHook after + // Phase 5 (passIdx=35) completes, so TAA only processes the periphery + // and the sharp DLSS center overwrites the TAA'd center region. + void BSImagespaceShaderHook::thunk(void* a_this, uint64_t a_param) + { + func(a_this, a_param); + + if (!ShouldReorderTAA()) + return; + + bool shouldLog = (g_diagCounter == 0); + auto context = globals::d3d::context; + auto& upscaling = globals::features::upscaling; + + // Get submit texture from bound RT after pipeline stage completes + ID3D11RenderTargetView* submitRTV = nullptr; + context->OMGetRenderTargets(1, &submitRTV, nullptr); + ID3D11Texture2D* submitTex = nullptr; + if (submitRTV) { + ID3D11Resource* res = nullptr; + submitRTV->GetResource(&res); + if (res) { + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&submitTex); + res->Release(); + } + submitRTV->Release(); + } + + if (shouldLog) { + if (submitTex) { + D3D11_TEXTURE2D_DESC desc; + submitTex->GetDesc(&desc); + logger::info("[TAAReorder] BSImagespaceShaderHook: submitTex=0x{:X} {}x{} fmt={} bind=0x{:X} postPPReady={} phase5={}", + (uintptr_t)submitTex, desc.Width, desc.Height, (uint32_t)desc.Format, + desc.BindFlags, g_postPPReady, g_phase5Complete); + } else { + logger::info("[TAAReorder] BSImagespaceShaderHook: no submitTex bound"); + } + } + + // Step 1: Evaluate DLSS on the captured post-PP intermediate + if (g_postPPReady && g_postPPCopy) { + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: evaluating DLSS on g_postPPCopy..."); + + upscaling.Upscale(g_postPPCopy.get()); + g_dlssReady = true; + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: DLSS evaluation complete"); + } else if (shouldLog) { + logger::info("[TAAReorder] BSImagespaceShaderHook: skip DLSS (postPPReady={} postPPCopy={})", + g_postPPReady, (void*)g_postPPCopy.get()); + } + + // Step 2: Save submit texture for deferred paste after Phase 5 (TAA) + // DO NOT paste here — Phase 5 hasn't run yet and would blur the DLSS center. + // The paste happens in ExecutePassHook after passIdx=35 completes. + if (g_dlssReady && submitTex) { + g_submitTexForPaste.copy_from(submitTex); + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: saved submitTex for deferred paste (waiting for Phase 5)"); + } + + if (submitTex) + submitTex->Release(); + } + + // ─── Depth/stencil registration hook: diagnostic logging ─── + // Tracks dimensions per slot and logs whenever they change. + // data[0]=width, data[1]=height based on initial analysis. + void DepthStencilRegHook::thunk(void* manager, uint32_t slot, void* desc) + { + if (desc && slot < 32) { + auto* data = reinterpret_cast(desc); + static uint32_t lastWidth[32] = {}; + static uint32_t lastHeight[32] = {}; + static uint32_t callCount[32] = {}; + + callCount[slot]++; + bool dimsChanged = (data[0] != lastWidth[slot] || data[1] != lastHeight[slot]); + if (dimsChanged) { + logger::info("[TAAReorder] DepthStencilReg: slot={} {}x{} → {}x{} (call #{}) data[2..7]= {} {} {} {} {} {}", + slot, lastWidth[slot], lastHeight[slot], data[0], data[1], callCount[slot], + data[2], data[3], data[4], data[5], data[6], data[7]); + lastWidth[slot] = data[0]; + lastHeight[slot] = data[1]; + } + } + + func(manager, slot, desc); + } + + // ─── Hidden area mesh render hook: pass-through ─── + // HAM renders normally. Previous "frozen frame" artifacts at the HAM boundary + // were caused by the depth upscaler's conservative blending (GatherRed + lerp) + // leaking depth=0 mask values into valid depth. Fixed in DepthUpscalePS.hlsl + // by switching to pure point sampling. + // HiddenAreaMeshHook removed — the passthrough hook was breaking HAM + // by corrupting the original function via Detours on an unverified RVA. + + // ─── BSOpenVR::Submit hook: diagnostic logging ─── + void SubmitHook::thunk(void* thisPtr, void* textureHandle) + { + if (g_diagCounter == 0 && textureHandle) { + auto tex2d = static_cast(textureHandle); + D3D11_TEXTURE2D_DESC desc = {}; + tex2d->GetDesc(&desc); + auto base = REL::Module::get().base(); + auto retAddr = reinterpret_cast(_ReturnAddress()); + logger::info("[TAAReorder] Submit: tex=0x{:X} {}x{} fmt={} dlssPasted={} callerRVA=0x{:X}", + (uintptr_t)textureHandle, desc.Width, desc.Height, (uint32_t)desc.Format, + g_dlssPasteComplete, retAddr - base); + } + + func(thisPtr, textureHandle); + } + + // ─── Post-processing conductor call hook: pass-through (tracking only) ─── + // Inner conductor call at 0x1325086 inside BSImagespaceShader::Render. + // Only tracks g_insideConductor state. DLSS logic is in BSImagespaceShaderHook. + void ConductorCallHook::thunk(void* a1, void* a2, void* a3, void* a4) + { + g_insideConductor = true; + func(a1, a2, a3, a4); + g_insideConductor = false; + } + + void InitEarly() + { + if (!REL::Module::IsVR()) + return; + + auto base = REL::Module::get().base(); + + // ─── Hook: DepthStencilRegistration (RVA 0x00DC79D0) ─── + // Must be installed before renderer initialization (which registers depth/stencil targets). + // Called from Upscaling::Load(), before D3D device creation. + DepthStencilRegHook::func = reinterpret_cast(base + 0x00DC79D0); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&DepthStencilRegHook::func), reinterpret_cast(DepthStencilRegHook::thunk)); + DetourTransactionCommit(); + + logger::info("[TAAReorder] InitEarly: DepthStencil registration hooked at RVA 0x00DC79D0"); + } + + void Init() + { + if (!REL::Module::IsVR()) + return; + + auto base = REL::Module::get().base(); + + // ─── Core pointers ─── + g_pRendererSingleton = reinterpret_cast(base + 0x034234C0); + + // ─── Hook: ForceTAASetter (RVA 0x005C8EE0) ─── + ForceTAASetter::func = base + 0x005C8EE0; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ForceTAASetter::func), reinterpret_cast(ForceTAASetter::thunk)); + DetourTransactionCommit(); + + // ─── Hook: TAAStateMachine (RVA 0x005C8F10) ─── + TAAStateMachine::func = base + 0x005C8F10; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&TAAStateMachine::func), reinterpret_cast(TAAStateMachine::thunk)); + DetourTransactionCommit(); + + // ─── Hook: ExecutePass (RVA 0x012D2540) ─── + ExecutePassHook::func = reinterpret_cast(base + 0x012D2540); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ExecutePassHook::func), reinterpret_cast(ExecutePassHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSOpenVR::Submit (RVA 0x00C53920) ─── + SubmitHook::func = reinterpret_cast(base + 0x00C53920); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&SubmitHook::func), reinterpret_cast(SubmitHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSImagespaceShader via write_thunk_call at RVA 0x132C827 ─── + // Wraps BSImagespaceShader::Render from the Orchestrator level. + // func() encompasses conductor (Phase 2A) + Phase 5 (TAA+DRS) + Submit. + // After func(): DLSS eval + paste. Matches PureDark's BSImagespaceShader_Hook_VR. + stl::write_thunk_call(base + 0x132C827); + + // ─── Hook: Inner conductor call via write_thunk_call at RVA 0x1325086 ─── + // Pass-through, only tracks g_insideConductor state. + stl::write_thunk_call(base + 0x1325086); + + g_initialized = true; + + logger::info("[TAAReorder] Initialized — base=0x{:X}", base); + logger::info("[TAAReorder] Post-pipeline DLSS mode (periphery TAA)"); + logger::info("[TAAReorder] BSImagespaceShader hooked via write_thunk_call at RVA 0x132C827 (DLSS eval + paste)"); + logger::info("[TAAReorder] Inner conductor hooked via write_thunk_call at RVA 0x1325086 (tracking only)"); + logger::info("[TAAReorder] BSOpenVR::Submit hooked at RVA 0x00C53920"); + } +} diff --git a/src/Features/TAAReorder.h b/src/Features/TAAReorder.h new file mode 100644 index 0000000000..e6aa0b8ee2 --- /dev/null +++ b/src/Features/TAAReorder.h @@ -0,0 +1,182 @@ +#pragma once + +// TAA Reordering for VR DLSS Viewport Scaling (Post-Conductor DLSS) +// +// PureDark's approach: DLSS is evaluated AFTER BSImagespaceShader::Render +// completes (which includes the conductor + Phase 5 TAA + DRS). +// +// Flow: +// 1. BSImagespaceShaderHook wraps the call at 0x132C827: +// func() runs → conductor executes Phase 2A +// a. ExecutePassHook captures Phase 2A output to g_postPPCopy +// 2. After func() returns in BSImagespaceShaderHook: +// a. Gets submit texture from bound RT +// b. Evaluates DLSS on g_postPPCopy (post-PP intermediate) +// c. Saves submit texture reference for deferred paste +// 3. Back in ExecutePassHook, Phase 5 (TAA + DRS) runs (passIdx=35): +// a. TAA anti-aliases the entire submit texture (periphery benefits) +// b. After Phase 5 returns: paste DLSS center onto submit texture +// (overwrites TAA'd center with sharp DLSS output) +// 4. Engine continues: Orchestrator → Scaleform Display (UI) → Submit +// 5. Lock DRS + UpdateCameraData (in Main_PostProcessing::thunk after func()) +// +// Both DLSS and TAA get Phase 2A's PP applied: +// - TAA: naturally (Phase 2A runs before Phase 5 in conductor) +// - DLSS: processes the Phase 2A output copy (g_postPPCopy) +// +// All RVAs are VR-specific (SkyrimVR.exe). + +#include +#include +#include +#include + +struct Upscaling; + +namespace TAAReorder +{ + // ─── Function pointer types ─── + using ExecutePass_t = void (*)(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + using BSOpenVRSubmit_t = void (*)(void* thisPtr, void* textureHandle); + + // ─── Resolved global data pointers ─── + inline uintptr_t* g_pRendererSingleton = nullptr; + inline bool g_initialized = false; + + // ─── Diagnostics (rate-limited logging) ─── + inline int g_diagCounter = 0; + inline constexpr int DIAG_INTERVAL = 300; + + // ─── Per-frame sequence counter (for verifying call ordering) ─── + inline int g_frameSeqCounter = 0; + + // ─── ExecutePass hook (conductor interposition) ─── + // RVA: 0x012D2540 — called by the conductor for each render pass. + // Copies Phase 2A output RT to g_postPPCopy for DLSS to process. + struct ExecutePassHook + { + static void thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + static inline ExecutePass_t func = nullptr; + }; + + // ─── BSImagespaceShader hook: DLSS eval after conductor, paste deferred ─── + // Wraps call at 0x132C827 (write_thunk_call). func() encompasses the + // conductor (Phase 2A) but NOT Phase 5 (TAA+DRS) — Phase 5 runs after us. + // We evaluate DLSS on the captured Phase 2A output and save the submit + // texture reference. The actual paste happens in ExecutePassHook after + // Phase 5 (passIdx=35) completes, so TAA only processes the periphery + // and the sharp DLSS center overwrites the TAA'd center region. + struct BSImagespaceShaderHook + { + static void thunk(void* a_this, uint64_t a_param); + static inline REL::Relocation func; + }; + + // ─── Post-processing conductor call hook (pass-through, tracking only) ─── + // RVA: 0x1325086 — inner conductor call inside BSImagespaceShader::Render. + // Only used for g_insideConductor tracking. + struct ConductorCallHook + { + static void thunk(void* a1, void* a2, void* a3, void* a4); + static inline REL::Relocation func; + }; + + // ─── Post-PP copy (Phase 2A output, DLSS color source) ─── + // After Phase 2A completes, ExecutePassHook copies the bound RT here. + // BSImagespaceShaderHook passes this to Upscale() as colorSourceOverride. + // After DLSS, FinalizePerEyeOutputs writes DLSS center back into this texture. + inline winrt::com_ptr g_postPPCopy; + inline winrt::com_ptr g_postPPCopySRV; + inline bool g_postPPReady = false; + + // ─── DLSS evaluation complete flag ─── + // Set after BSImagespaceShaderHook evaluates DLSS on g_postPPCopy. + // Used to gate the DLSS center paste step. + inline bool g_dlssReady = false; + + // ─── DLSS paste complete flag ─── + // Set after ConductorCallHook pastes DLSS center onto submit texture. + inline bool g_dlssPasteComplete = false; + + // ─── Phase 5 tracking ─── + inline bool g_phase5Complete = false; + + // ─── Conductor state tracking ─── + inline bool g_insideConductor = false; + inline int g_bsHookCallCount = 0; + + // ─── RGB-only blend state (may be useful for future feathering) ─── + inline winrt::com_ptr g_rgbOnlyBlendState; + + // ─── Stencil state for HAM-aware compositing ─── + // DepthEnable=false, StencilEnable=true, StencilFunc=EQUAL, StencilRef=0. + // Only writes to pixels where stencil==0 (visible, non-HAM pixels). + // Matches PureDark's approach in Evaluate()/RenderTexture(). + inline winrt::com_ptr g_hamStencilState; + + // ─── Saved submit texture for deferred paste (after Phase 5) ─── + inline winrt::com_ptr g_submitTexForPaste; + + // ─── Cached UAV for submit texture (ClearHMDMask + ForceAlpha on submit after DLSS paste) ─── + inline winrt::com_ptr g_submitTexUAV; + inline ID3D11Texture2D* g_submitTexUAVOwner = nullptr; // track which texture the UAV belongs to + + // ─── ForceAlpha compute shader (sets alpha=1.0 to fix Scaleform UI rendering) ─── + inline winrt::com_ptr g_forceAlphaCS; + + // ─── Setter hook: Setter A (Force TAA) ─── + // RVA: 0x005C8EE0 — unconditional TAA enable. + // Pass-through (we want TAA to run natively). + struct ForceTAASetter + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Setter hook: Setter B (TAA State Machine) ─── + // RVA: 0x005C8F10 — conditional TAA enable/disable. + // Pass-through (we want TAA to run natively). + struct TAAStateMachine + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Depth/stencil registration hook ─── + // RVA: 0x00DC79D0 — registers depth/stencil targets in the RT manager (+0x1388). + // Separate path from color RTs (registered via sub_417980 at +0x1350). + // Hook intercepts registration to log descriptor layout and scale dimensions + // for slots 0, 1, 7 to match display resolution (fixes HAM not being upscaled). + using RegisterDepthStencil_t = void (*)(void* manager, uint32_t slot, void* desc); + struct DepthStencilRegHook + { + static void thunk(void* manager, uint32_t slot, void* desc); + static inline RegisterDepthStencil_t func = nullptr; + }; + + // ─── BSOpenVR::Submit hook (VR frame submission interception) ─── + // RVA: 0x00C53920 — BSOpenVR::Submit, vtable[3]. + // Diagnostic logging only. + struct SubmitHook + { + static void thunk(void* thisPtr, void* textureHandle); + static inline BSOpenVRSubmit_t func = nullptr; + }; + + // Check if TAA reordering should be active based on current settings + bool ShouldReorderTAA(); + + // Ensure g_postPPCopy matches the source texture dimensions/format + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex); + + // Helper: draw fullscreen format-converting copy (Load-based, 1:1 pixel copy). + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH); + + // Install hooks that must be in place before renderer initialization (depth/stencil reg). + // Call from Upscaling::Load() (VR only). + void InitEarly(); + + // Initialize all pointers and install hooks. Call once from PostPostLoad (VR only). + void Init(); +} diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 61774e99b6..d58c75c4fc 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -1,8 +1,10 @@ #include "Upscaling.h" #include "Deferred.h" +#include "Features/VR.h" #include "Hooks.h" #include "State.h" +#include "TAAReorder.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" #include "Upscaling/Streamline.h" @@ -25,7 +27,11 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( sharpnessFSR, sharpnessDLSS, presetDLSS, - useGatherWideKernel); + useGatherWideKernel, + vrDlssViewportScale, + vrPeripheryTAA, + vrDlssCropOffsetX, + vrDlssFeatherWidth); decltype(&D3D11CreateDeviceAndSwapChain) ptrD3D11CreateDeviceAndSwapChainUpscaling; @@ -61,7 +67,9 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( upscaling.CheckBackendFeatures(pAdapter); // Use better swap effect to prevent tearing and improve performance - pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + // But don't change it for VR as it can affect frame pacing with the VR compositor + if (!globals::game::isVR) + pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; bool shouldProxy = !globals::game::isVR; if (shouldProxy) @@ -141,8 +149,14 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( ppImmediateContext); if (upscaling.IsBackendInitialized()) { - upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); - upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + // Skip Streamline interface wrapping for VR — slUpgradeInterface wraps the D3D + // device and swap chain with Streamline proxy objects, which disrupts VR compositor + // frame pacing (causes judder/stuttering). DLSS still functions without wrapped + // interfaces; only frame generation requires them (and that's already VR-gated above). + if (!globals::game::isVR) { + upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); + upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + } upscaling.SetBackendD3DDevice(*ppDevice); upscaling.PostBackendDevice(); } @@ -238,6 +252,41 @@ void Upscaling::DrawSettings() ImGui::Text("Set to 'Default' for automatic selection based on your Upscale Preset and hardware."); ImGui::Text("Changing this setting requires a restart to take effect."); } + + if (globals::game::isVR) { + if (ImGui::TreeNodeEx("VR Viewport Scaling", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::SliderFloat("DLSS Viewport Scale", &settings.vrDlssViewportScale, 0.5f, 1.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Controls the fraction of each eye's view that DLSS processes."); + ImGui::Text("Lower values = better performance, less visible area upscaled by DLSS."); + ImGui::Text("The VR lens hides the periphery, so 0.7-0.85 is recommended."); + ImGui::Text("The periphery outside the DLSS region is filled with a bilinear upscale."); + } + + if (settings.vrDlssViewportScale < 1.0f) { + bool peripheryTAA = settings.vrPeripheryTAA != 0; + if (ImGui::Checkbox("Periphery TAA", &peripheryTAA)) + settings.vrPeripheryTAA = peripheryTAA ? 1 : 0; + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Applies temporal anti-aliasing to the bilinear-upscaled periphery."); + ImGui::Text("Reduces shimmer and improves peripheral quality."); + ImGui::Text("The DLSS center region passes through unchanged."); + } + + ImGui::SliderFloat("Nasal Crop Offset", &settings.vrDlssCropOffsetX, 0.0f, 0.3f, "%.3f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Shifts the DLSS crop region toward the nose."); + ImGui::Text("Higher values = more of the nasal view processed by DLSS."); + ImGui::Text("0.0 = centered, 0.1-0.2 recommended."); + } + + // Feathered edge disabled pending fix — hidden from UI + // ImGui::SliderFloat("Crop Edge Feather", &settings.vrDlssFeatherWidth, 0.0f, 0.1f, "%.3f"); + } + + ImGui::TreePop(); + } + } } if (globals::game::isVR) { @@ -460,6 +509,9 @@ void Upscaling::LoadSettings(json& o_json) logger::warn("[Upscaling] Loaded useGatherWideKernel {} out of range, clamping to 1", settings.useGatherWideKernel); settings.useGatherWideKernel = 1; } + settings.vrDlssViewportScale = std::clamp(settings.vrDlssViewportScale, 0.5f, 1.0f); + settings.vrDlssCropOffsetX = std::clamp(settings.vrDlssCropOffsetX, 0.0f, 0.3f); + settings.vrDlssFeatherWidth = std::clamp(settings.vrDlssFeatherWidth, 0.0f, 0.1f); auto iniSettingCollection = globals::game::iniPrefSettingCollection; if (iniSettingCollection) { auto setting = iniSettingCollection->GetSetting("bUseTAA:Display"); @@ -487,6 +539,10 @@ void Upscaling::DataLoaded() void Upscaling::Load() { *(uintptr_t*)&ptrD3D11CreateDeviceAndSwapChainUpscaling = SKSE::PatchIAT(hk_D3D11CreateDeviceAndSwapChainUpscaling, "d3d11.dll", "D3D11CreateDeviceAndSwapChain"); + + // Install depth/stencil registration hook early (before renderer creates targets) + if (globals::game::isVR) + TAAReorder::InitEarly(); } struct BSImageSpace_Init_FXAA @@ -530,6 +586,10 @@ void Upscaling::PostPostLoad() stl::detour_thunk(REL::RelocationID(98974, 105626)); logger::info("[Upscaling] Installed hooks"); + + // Install TAA reordering hooks for VR periphery TAA + if (globals::game::isVR) + TAAReorder::Init(); } Upscaling::UpscaleMethod Upscaling::GetUpscaleMethod() const @@ -639,6 +699,13 @@ void Upscaling::DestroyUpscalingTextureResources(UpscaleMethod a_upscalemethod) } } + // VR periphery TAA textures - only needed for DLSS with viewport scaling + if (a_upscalemethod != UpscaleMethod::kDLSS) { + vrPreTAACopy = nullptr; + for (int i = 0; i < 2; i++) + vrTAAdPerEye[i].reset(); + } + // Motion vector copy texture is only needed for DLSS - destroy when switching away from DLSS if (a_upscalemethod != UpscaleMethod::kDLSS) { if (motionVectorCopyTexture) { @@ -692,6 +759,8 @@ void Upscaling::CheckResources(UpscaleMethod a_upscalemethod) vrIntermediateMotionVectors[i].reset(); vrIntermediateReactiveMask[i].reset(); vrIntermediateTransparencyMask[i].reset(); + vrFinalOutput[i].reset(); + vrCropColorIn[i].reset(); } } } @@ -744,6 +813,8 @@ ID3D11PixelShader* Upscaling::GetDepthRefractionUpscalePS() if (!depthRefractionUpscalePS) { logger::debug("Compiling DepthRefractionUpscalePS.hlsl"); std::vector> defines = { { "PSHADER", "" } }; + if (globals::game::isVR) + defines.push_back({ "VR", "" }); depthRefractionUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DepthRefractionUpscalePS.hlsl", defines, "ps_5_0")); } @@ -771,6 +842,37 @@ ID3D11VertexShader* Upscaling::GetUpscaleVS() return upscaleVS.get(); } +ID3D11PixelShader* Upscaling::GetDlssCompositePS() +{ + if (!vrDlssCompositePS) { + logger::debug("Compiling DLSSCompositePS.hlsl"); + vrDlssCompositePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0")); + } + + return vrDlssCompositePS.get(); +} + +ID3D11PixelShader* Upscaling::GetDlssUpscalePS() +{ + if (!vrDlssUpscalePS) { + logger::debug("Compiling DLSSCompositePS.hlsl (BILINEAR_UPSCALE)"); + vrDlssUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", + { { "PSHADER", "" }, { "BILINEAR_UPSCALE", "" } }, "ps_5_0")); + } + + if (!vrDlssUpscaleCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = sizeof(DlssCompositeCB); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrDlssUpscaleCB.put())); + } + + return vrDlssUpscalePS.get(); +} + eastl::unique_ptr Upscaling::CreateTextureFromSource(ID3D11Resource* src, uint32_t width, uint32_t height, bool copyBindFlags, bool createSRV, bool createUAV, const char* name) { @@ -820,7 +922,7 @@ void Upscaling::CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight std::string suffix = (i == 0) ? "Left" : "Right"; vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, inWidth, inHeight, false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); - vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, false, ("Upscale_ColorOut_" + suffix).c_str()); + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); // Depth: R32_TYPELESS base (matches kMAIN), with R32_FLOAT SRV for ClearHMDMaskCS. // CopySubresourceRegion requires matching typeless formats; SRV reinterprets as R32_FLOAT. @@ -873,48 +975,228 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; - bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; - if (!needsRecreate) { - needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); - } - if (needsRecreate) { - logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", - eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); - CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, - colorSrc, mvecSrc, reactiveSrc, transparencySrc); - } + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + if (viewportScaling) { + // Viewport scaling: physically crop all DLSS inputs to eliminate non-zero subrect offsets. + // vrIntermediateColorIn stays at FULL render-res (for ClearHMDMask + FillPeriphery). + // All other DLSS inputs (depth, mvec, masks) are CROP-sized. + // vrCropColorIn is CROP-sized (DLSS color input, extracted from masked full color). + // This ensures DLSS sees all inputs at {0,0} with no subrect base offsets, + // which is critical for correct temporal reprojection during camera motion. + uint32_t cropWidthIn = (uint32_t)(eyeWidthIn * vpScale); + uint32_t cropHeightIn = (uint32_t)(eyeHeightIn * vpScale); + uint32_t cropWidthOut = (uint32_t)(eyeWidthOut * vpScale); + uint32_t cropHeightOut = (uint32_t)(eyeHeightOut * vpScale); + + bool needsRecreate = !vrIntermediateColorIn[0] || !vrCropColorIn[0] || !vrIntermediateDepth[0] || + !vrIntermediateColorOut[0] || !vrFinalOutput[0]; + if (!needsRecreate) { + // Check format too — periphery TAA feeds R8G8B8A8 post-PP intermediate, + // while normal DLSS feeds R11G11B10 kMAIN. Must recreate on format change. + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrCropColorIn[0]->desc.Width != cropWidthIn || + vrCropColorIn[0]->desc.Height != cropHeightIn || + vrIntermediateDepth[0]->desc.Width != cropWidthIn || + vrIntermediateDepth[0]->desc.Height != cropHeightIn || + vrIntermediateColorOut[0]->desc.Width != cropWidthOut || + vrIntermediateColorOut[0]->desc.Height != cropHeightOut || + vrFinalOutput[0]->desc.Width != eyeWidthOut || + vrFinalOutput[0]->desc.Height != eyeHeightOut); + } - // Extract both eyes' inputs from combined stereo buffers - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR viewport-scaled intermediates: full {}x{}, crop in {}x{}, crop out {}x{}", + eyeWidthIn, eyeHeightIn, cropWidthIn, cropHeightIn, cropWidthOut, cropHeightOut); + + for (int i = 0; i < 2; i++) { + std::string suffix = (i == 0) ? "Left" : "Right"; + + // Full-size color for ClearHMDMask + FillPeriphery + vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, eyeWidthIn, eyeHeightIn, + false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); + + // Crop-sized DLSS color input (needs UAV for ClearHMDMask) + vrCropColorIn[i] = CreateTextureFromSource(colorSrc, cropWidthIn, cropHeightIn, + false, true, true, ("Upscale_CropColorIn_" + suffix).c_str()); + + // Crop-sized DLSS output + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, cropWidthOut, cropHeightOut, + false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); + + // Crop-sized depth (R32_TYPELESS with R32_FLOAT SRV) + { + D3D11_TEXTURE2D_DESC depthDesc = {}; + depthDesc.Width = cropWidthIn; + depthDesc.Height = cropHeightIn; + depthDesc.MipLevels = 1; + depthDesc.ArraySize = 1; + depthDesc.Format = DXGI_FORMAT_R32_TYPELESS; + depthDesc.SampleDesc.Count = 1; + depthDesc.Usage = D3D11_USAGE_DEFAULT; + depthDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + vrIntermediateDepth[i] = eastl::make_unique(depthDesc); + Util::SetResourceName(vrIntermediateDepth[i]->resource.get(), ("Upscale_Depth_" + suffix).c_str()); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + vrIntermediateDepth[i]->CreateSRV(srvDesc); + } - context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); - } + // Crop-sized motion vectors, reactive mask, transparency mask + vrIntermediateMotionVectors[i] = CreateTextureFromSource(mvecSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_MVec_" + suffix).c_str()); + vrIntermediateReactiveMask[i] = CreateTextureFromSource(reactiveSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Reactive_" + suffix).c_str()); + vrIntermediateTransparencyMask[i] = CreateTextureFromSource(transparencySrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Transparency_" + suffix).c_str()); + + // Full display-res composition target (needs RTV for PS feathered composite) + vrFinalOutput[i] = CreateTextureFromSource(colorSrc, eyeWidthOut, eyeHeightOut, + false, true, true, ("Upscale_FinalOutput_" + suffix).c_str()); + // Add render target bind flag and create RTV for pixel shader composite + { + D3D11_TEXTURE2D_DESC finalDesc; + vrFinalOutput[i]->resource->GetDesc(&finalDesc); + if (!(finalDesc.BindFlags & D3D11_BIND_RENDER_TARGET)) { + // Recreate with render target support + finalDesc.BindFlags |= D3D11_BIND_RENDER_TARGET; + vrFinalOutput[i] = eastl::make_unique(finalDesc); + Util::SetResourceName(vrFinalOutput[i]->resource.get(), ("Upscale_FinalOutput_" + suffix).c_str()); + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = finalDesc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = 1; + vrFinalOutput[i]->CreateSRV(srvDesc); + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = finalDesc.Format; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateUAV(uavDesc); + } + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = finalDesc.Format; + rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateRTV(rtvDesc); + } + } + } - // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer. - // Depth is read from the combined stereo SRV at the per-eye offset; color is written - // to the isolated per-eye UAV (ColorOffsetX = 0). - auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + // Copy full eye to full-size vrIntermediateColorIn (raw render-res, no HMD mask yet) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + } + + // Nasal offset: shift crop toward nose + // Eye 0 (left): nose is right -> shift right (+) + // Eye 1 (right): nose is left -> shift left (-) + float nasalShiftFrac = settings.vrDlssCropOffsetX; + uint32_t baseCropOffsetX = (eyeWidthIn - cropWidthIn) / 2; + uint32_t cropOffsetY = (eyeHeightIn - cropHeightIn) / 2; + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + + // Extract DLSS crop from raw buffer (before TAA or HMD mask), then mask the crop directly. + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + + // Per-eye nasal offset in render resolution space + int32_t nasalShift = (int32_t)(nasalShiftFrac * eyeWidthIn); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t cropOffsetX = (uint32_t)std::clamp((int32_t)baseCropOffsetX + eyeNasalShift, 0, (int32_t)(eyeWidthIn - cropWidthIn)); + + // Crop color from raw (unmasked, non-TAA'd) full-size buffer + D3D11_BOX cropBox = { cropOffsetX, cropOffsetY, 0, cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrCropColorIn[i]->resource.get(), 0, 0, 0, 0, + vrIntermediateColorIn[i]->resource.get(), 0, &cropBox); + + // ClearHMDMask directly on the crop (depth offset accounts for eye + crop position in stereo buffer) + ClearHMDMask(vrCropColorIn[i]->uav.get(), depthTexture.depthSRV, + cropWidthIn, cropHeightIn, offsetXIn + cropOffsetX, 0, cropOffsetY); + + // Crop depth/mvec/reactive/transparency directly from stereo buffers + D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, + offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, + depthSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, + mvecSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, + reactiveSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, + transparencySrc, 0, &stereoCropBox); + } - for (uint32_t i = 0; i < 2; ++i) { - uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; - ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, - eyeWidthIn, eyeHeightIn, depthOffset, 0); + // ClearHMDMask on full-size buffer (for FillPeriphery) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } + } else { + // Non-viewport-scaling path: all textures at full per-eye dimensions + bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; + if (!needsRecreate) { + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + } + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", + eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, + colorSrc, mvecSrc, reactiveSrc, transparencySrc); + } + + // Release viewport-scaling-specific textures + for (int i = 0; i < 2; i++) { + vrCropColorIn[i].reset(); + vrFinalOutput[i].reset(); + vrTAAdPerEye[i].reset(); + } + vrPreTAACopy = nullptr; + + // Copy full eye to per-eye intermediates + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); + } + + // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } } if (state->frameAnnotations) state->EndPerfEvent(); } -void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) +void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only) { if (!globals::game::isVR) return; @@ -929,11 +1211,258 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); uint32_t eyeHeightOut = (uint32_t)screenSize.y; - // Write upscaled outputs back - for (uint32_t i = 0; i < 2; ++i) { + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { uint32_t offsetXOut = (i == 1) ? eyeWidthOut : 0; - D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; - context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + + if (viewportScaling && vrFinalOutput[i]) { + // Paste crop-sized DLSS output into center of full-size composition target + uint32_t dlssWidthOut = vrIntermediateColorOut[i]->desc.Width; + uint32_t dlssHeightOut = vrIntermediateColorOut[i]->desc.Height; + + // Calculate paste position with nasal offset (in display resolution space) + int32_t nasalShift = (int32_t)(settings.vrDlssCropOffsetX * eyeWidthOut); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t pasteX = (uint32_t)std::clamp((int32_t)((eyeWidthOut - dlssWidthOut) / 2) + eyeNasalShift, 0, (int32_t)(eyeWidthOut - dlssWidthOut)); + uint32_t pasteY = (eyeHeightOut - dlssHeightOut) / 2; + + float featherPixels = settings.vrDlssFeatherWidth * eyeWidthOut; + + static uint32_t featherLogCount = 0; + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: featherPixels={:.1f} (width={:.4f} * eyeW={}), dlss={}x{}, paste=({},{})", + i, featherPixels, settings.vrDlssFeatherWidth, eyeWidthOut, dlssWidthOut, dlssHeightOut, pasteX, pasteY); + } + + if (featherPixels > 0.0f && vrFinalOutput[i]->rtv) { + // Pixel shader feathered composite (preserves periphery TAA via hardware alpha blend) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + + // Lazy-compile PS and create blend state + if (!vrFeatheredCompositePS) { + vrFeatheredCompositePS.attach(reinterpret_cast(Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0"))); + if (vrFeatheredCompositePS) + logger::info("[Upscaling] FeatheredCompositePS compiled successfully"); + else + logger::error("[Upscaling] FeatheredCompositePS FAILED to compile!"); + + // Create SrcAlpha/InvSrcAlpha blend state + D3D11_BLEND_DESC blendDesc = {}; + blendDesc.RenderTarget[0].BlendEnable = TRUE; + blendDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; + blendDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; + blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(globals::d3d::device->CreateBlendState(&blendDesc, vrFeatheredCompositeBlendState.put())); + + // Create constant buffer (48 bytes: float2 origin, float2 size, float feather, pad, float2 srcUVOrigin, float2 srcUVScale) + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + + if (vrFeatheredCompositePS) { + // Save current OM state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11RenderTargetView* oldRTV = nullptr; + ID3D11DepthStencilView* oldDSV = nullptr; + context->OMGetRenderTargets(1, &oldRTV, &oldDSV); + + // Update constant buffer + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)pasteX, (float)pasteY, + (float)dlssWidthOut, (float)dlssHeightOut, + featherPixels, 0.0f, + 0.0f, 0.0f, // SrcUVOrigin: (0,0) for per-eye textures (identity) + 1.0f, 1.0f, // SrcUVScale: (1,1) for per-eye textures (identity) + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + // Set viewport to match the per-eye final output + D3D11_VIEWPORT vp = {}; + vp.Width = (float)eyeWidthOut; + vp.Height = (float)eyeHeightOut; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + // Set blend state for feathered alpha compositing + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + // Set render target to vrFinalOutput (contains TAA'd periphery) + ID3D11RenderTargetView* rtvs[1] = { vrFinalOutput[i]->rtv.get() }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + // Set shaders + context->VSSetShader(GetUpscaleVS(), nullptr, 0); + context->PSSetShader(vrFeatheredCompositePS.get(), nullptr, 0); + + // Set input assembler for bufferless fullscreen triangle + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + // Bind rasterizer state (no culling) + context->RSSetState(upscaleRasterizerState.get()); + + // Bind crop texture SRV and sampler + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->PSSetShaderResources(0, 1, srvs); + + if (!vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&sampDesc, vrLinearSampler.put())); + } + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + // Bind constant buffer + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + // Draw fullscreen triangle + context->Draw(3, 0); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositePS drawn: eye={}, crop=({},{}) {}x{}, feather={:.1f}", + i, pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels); + featherLogCount++; + } + + // Cleanup PS state + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->PSSetShaderResources(0, 1, nullSRV); + context->PSSetShader(nullptr, nullptr, 0); + context->VSSetShader(nullptr, nullptr, 0); + + // Restore OM state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + if (oldBlendState) + oldBlendState->Release(); + context->OMSetRenderTargets(1, &oldRTV, oldDSV); + if (oldRTV) + oldRTV->Release(); + if (oldDSV) + oldDSV->Release(); + } else { + // PS compilation failed; fall back to hard paste + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else if (featherPixels > 0.0f) { + // Fallback: RTV not available, use legacy CS path + if (!vrFeatheredCompositeCS) { + vrFeatheredCompositeCS.attach((ID3D11ComputeShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositeCS.hlsl", {}, "cs_5_0")); + if (vrFeatheredCompositeCS) + logger::info("[Upscaling] FeatheredCompositeCS compiled (fallback)"); + else + logger::error("[Upscaling] FeatheredCompositeCS FAILED to compile!"); + if (!vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + } + + if (vrFeatheredCompositeCS) { + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + uint32_t cx, cy, cw, ch; + float feather; + float pad[3]; + } cbData = { + pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels, {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[i]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(vrFeatheredCompositeCS.get(), nullptr, 0); + context->Dispatch((dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8, 1); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositeCS dispatched (fallback): groups=({},{})", + (dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8); + featherLogCount++; + } + + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->CSSetShaderResources(0, 1, nullSRV); + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + } else { + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else { + // Hard paste (no feathering) + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: using HARD PASTE (no feathering)", i); + featherLogCount++; + } + } + + // Copy composition target to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrFinalOutput[i]->resource.get(), 0, &outBox); + } else { + // Direct copy DLSS output to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + } } if (state->frameAnnotations) @@ -941,7 +1470,11 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) } void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX) + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY, + uint32_t depthWidth, uint32_t depthHeight, + uint32_t colorWidth, uint32_t colorHeight, + ID3D11ShaderResourceView* fallbackSRV, uint32_t fallbackOffsetX) { if (!globals::game::isVR) return; @@ -952,7 +1485,7 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe vrClearHMDMaskCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/ClearHMDMaskCS.hlsl", {}, "cs_5_0")); D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = 16; // 4 uints + cbDesc.ByteWidth = 32; // 8 uints (offsets + optional scaling dimensions) cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -965,8 +1498,9 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->CSSetShader(vrClearHMDMaskCS.get(), nullptr, 0); - ID3D11ShaderResourceView* srvs[1] = { depthSRV }; - context->CSSetShaderResources(0, 1, srvs); + // t0 = depth, t1 = fallback (nullptr → unbound → reads return (0,0,0,0) → black) + ID3D11ShaderResourceView* srvs[2] = { depthSRV, fallbackSRV }; + context->CSSetShaderResources(0, 2, srvs); ID3D11UnorderedAccessView* uavs[1] = { colorUAV }; context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); @@ -974,9 +1508,10 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe D3D11_MAPPED_SUBRESOURCE mapped{}; context->Map(vrClearHMDMaskCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - uint32_t offsets[4] = { depthOffsetX, colorOffsetX, 0, 0 }; + uint32_t cbData[8] = { depthOffsetX, colorOffsetX, depthOffsetY, fallbackOffsetX, + depthWidth, depthHeight, colorWidth, colorHeight }; - memcpy(mapped.pData, offsets, sizeof(offsets)); + memcpy(mapped.pData, cbData, sizeof(cbData)); context->Unmap(vrClearHMDMaskCB.get(), 0); ID3D11Buffer* cbs[1] = { vrClearHMDMaskCB.get() }; @@ -984,13 +1519,81 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->Dispatch(dispatchX, dispatchY, 1); + // Unbind + ID3D11ShaderResourceView* nullSRVs[2] = { nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + ID3D11Buffer* nullCB[1] = { nullptr }; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } +} + +void Upscaling::FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV) +{ + if (!globals::game::isVR || !vrFinalOutput[eyeIndex]) + return; + if (!overrideSRV && !vrIntermediateColorIn[eyeIndex]) + return; + + auto context = globals::d3d::context; + + if (!vrPeripheryFillCS) { + vrPeripheryFillCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/VRPeripheryFillCS.hlsl", {}, "cs_5_0")); + + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 16; // 4 uints + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrPeripheryFillCB.put())); + + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&samplerDesc, vrLinearSampler.put())); + } + + if (vrPeripheryFillCS) { + auto dispatchX = (dstWidth + 7) / 8; + auto dispatchY = (dstHeight + 7) / 8; + + context->CSSetShader(vrPeripheryFillCS.get(), nullptr, 0); + + // Read from overrideSRV (e.g. TAA texture) or default render-res per-eye input. + ID3D11ShaderResourceView* srvs[1] = { overrideSRV ? overrideSRV : vrIntermediateColorIn[eyeIndex]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[eyeIndex]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrPeripheryFillCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + uint32_t cbData[4] = { srcWidth, srcHeight, dstWidth, dstHeight }; + memcpy(mapped.pData, cbData, sizeof(cbData)); + context->Unmap(vrPeripheryFillCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrPeripheryFillCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + + context->Dispatch(dispatchX, dispatchY, 1); + // Unbind ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; ID3D11Buffer* nullCB[1] = { nullptr }; + ID3D11SamplerState* nullSampler[1] = { nullptr }; context->CSSetShaderResources(0, 1, nullSRV); context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetSamplers(0, 1, nullSampler); context->CSSetShader(nullptr, nullptr, 0); } } @@ -1032,6 +1635,10 @@ void Upscaling::ConfigureTAA() auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + // Disable water TAA when upscaling is enabled + bool* enableWaterTAA = reinterpret_cast(reinterpret_cast(BSImagespaceShaderISTemporalAA) + 0x38LL); + *enableWaterTAA = !(upscaleMethod == UpscaleMethod::kNONE || upscaleMethod == UpscaleMethod::kTAA); + // Force enable TAA if needed BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod != UpscaleMethod::kNONE; } @@ -1097,6 +1704,10 @@ void Upscaling::ConfigureUpscaling(RE::BSGraphics::State* a_viewport) // Disable dynamic resolution unless the game explicitly enables it if (!globals::game::isVR) runtimeData.dynamicResolutionLock = 1; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // No longer need to force-disable culling when upscaling is active. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs. } void Upscaling::SetupResources() @@ -1141,6 +1752,7 @@ void Upscaling::SetupResources() depthStencilDesc.BackFace.StencilDepthFailOp = depthStencilDesc.FrontFace.StencilDepthFailOp; depthStencilDesc.BackFace.StencilPassOp = depthStencilDesc.FrontFace.StencilPassOp; depthStencilDesc.BackFace.StencilFunc = depthStencilDesc.FrontFace.StencilFunc; + } else { depthStencilDesc.StencilEnable = false; // Disable stencil testing } @@ -1194,6 +1806,16 @@ void Upscaling::ClearShaderCache() depthRefractionUpscalePS = nullptr; // com_ptr automatically releases underwaterMaskUpscalePS = nullptr; // com_ptr automatically releases upscaleVS = nullptr; // com_ptr automatically releases + vrClearHMDMaskCS = nullptr; + vrPeripheryFillCS = nullptr; + vrPeripheryFillCB = nullptr; + vrFeatheredCompositeCS = nullptr; + vrFeatheredCompositeCB = nullptr; + vrFeatheredCompositePS = nullptr; + vrFeatheredCompositeBlendState = nullptr; + vrDlssCompositePS = nullptr; + vrDlssUpscalePS = nullptr; + vrDlssUpscaleCB = nullptr; } void Upscaling::CopySharedD3D12Resources() @@ -1404,6 +2026,17 @@ bool Upscaling::IsUpscalingActive() const return resolutionScale.x < .99f; } +std::vector Upscaling::GetActiveConstraints() const +{ + std::vector constraints; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs, + // so we no longer need to constrain depth buffer culling when upscaling is active. + + return constraints; +} + /** * @brief Retrieves the current frame time for frame generation. * @@ -1515,7 +2148,7 @@ Upscaling::BlurResources Upscaling::GetBlurResources() const return {}; } -void Upscaling::Upscale() +void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) { auto upscaleMethod = GetUpscaleMethod(); @@ -1576,8 +2209,11 @@ void Upscaling::Upscale() { state->BeginPerfEvent("Upscaling"); + // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) + ID3D11Resource* colorSrc = colorSourceOverride ? static_cast(colorSourceOverride) : static_cast(main.texture); + if (upscaleMethod == UpscaleMethod::kDLSS) { - streamline.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); + streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); } else if (upscaleMethod == UpscaleMethod::kFSR) { fidelityFX.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVector.texture, settings.sharpnessFSR); } @@ -1715,9 +2351,10 @@ void Upscaling::UpscaleDepth() // Skip alias copies to reduce unnecessary copy churn. copyIfNonAliased(depthCopy.texture, depth.texture); - // Clear stencil to be 0xFF + // Clear stencil to 0x00 for VR — the VR shader path discards pixels with + // stencil > 0x00, so 0x00 means "all pixels valid" (process entire display-res). if (globals::game::isVR) { - context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0xFF); + context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0x00); } // Set depth stencil state to write 0x00 @@ -1817,20 +2454,89 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 if (upscaling.d3d12SwapChainActive && upscaling.settings.frameGenerationMode) upscaling.CopySharedD3D12Resources(); - if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) - upscaling.PerformUpscaling(); + // Increment diagnostic counter (rate-limits TAAReorder logging) + if (TAAReorder::g_initialized) { + TAAReorder::g_diagCounter = (TAAReorder::g_diagCounter + 1) % TAAReorder::DIAG_INTERVAL; + if (TAAReorder::g_diagCounter == 0) { + TAAReorder::g_frameSeqCounter = 0; + logger::info("[SEQ] Main_PostProcessing START seq={}", TAAReorder::g_frameSeqCounter++); + } + } - if (upscaleMethod == UpscaleMethod::kDLSS) - upscaling.ApplySharpening(); + bool peripheryTAA = TAAReorder::ShouldReorderTAA(); + + // Reset per-frame flags unconditionally + TAAReorder::g_postPPReady = false; + TAAReorder::g_dlssReady = false; + TAAReorder::g_dlssPasteComplete = false; + TAAReorder::g_phase5Complete = false; + TAAReorder::g_bsHookCallCount = 0; + TAAReorder::g_submitTexForPaste = nullptr; + + if (peripheryTAA) { + // ─── Periphery TAA with post-conductor DLSS (PureDark's approach) ─── + // func() with TAA enabled → conductor runs all passes unimpeded: + // Phase 2A: ExecutePassHook captures post-PP intermediate to g_postPPCopy + // Phase 5: TAA + DRS → submit texture + // After conductor: ConductorCallHook evaluates DLSS on g_postPPCopy, + // then pastes DLSS center onto submit texture + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + + if (TAAReorder::g_diagCounter == 0) + logger::info("[TAAReorder] peripheryTAA: running func() with TAA enabled..."); + + // Clear stencil marks left by VRStereoOptimizations to prevent TAA interference + if (globals::features::vr.stereoOpt.loaded) { + auto renderer = globals::game::renderer; + auto& depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depth.views[0]) + globals::d3d::context->ClearDepthStencilView(depth.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0); + } - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + // Set TAA high-frequency response for periphery quality + auto fTAAHighFreq = RE::GetINISetting("fTAAHighFreq:Display"); + float savedHF = fTAAHighFreq ? fTAAHighFreq->data.f : 0.0f; + if (fTAAHighFreq) + fTAAHighFreq->data.f = 1.0f; + + // func() with TAA ENABLED — DLSS eval + paste in ConductorCallHook (post-conductor) + BSImagespaceShaderISTemporalAA->taaEnabled = true; + func(a_this, a3, a_target, a_4, a_5); + + // Restore original TAA HF value + if (fTAAHighFreq) + fTAAHighFreq->data.f = savedHF; - BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod == UpscaleMethod::kTAA; + // Lock DRS + update camera (after conductor completes) + auto& runtimeData = globals::game::graphicsState->GetRuntimeData(); + runtimeData.dynamicResolutionLock = 1; + UpdateCameraData(); + + // Disable TAA for remainder of frame + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } else { + // ─── Normal flow (no periphery TAA) ─── + if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) + upscaling.PerformUpscaling(); + + if (upscaleMethod == UpscaleMethod::kDLSS) + upscaling.ApplySharpening(); + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); - func(a_this, a3, a_target, a_4, a_5); + BSImagespaceShaderISTemporalAA->taaEnabled = (upscaleMethod == UpscaleMethod::kTAA); + + if (TAAReorder::g_diagCounter == 0 && TAAReorder::g_initialized) + logger::info("[DIAG] Normal DLSS flow: taaEnabled={}, running func()...", BSImagespaceShaderISTemporalAA->taaEnabled); + + func(a_this, a3, a_target, a_4, a_5); + + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } - BSImagespaceShaderISTemporalAA->taaEnabled = false; } void Upscaling::SetScissorRect::thunk(RE::BSGraphics::Renderer* This, int a_left, int a_top, int a_right, int a_bottom) diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index 1e88e99937..df49af71fe 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -57,8 +57,12 @@ struct Upscaling : Feature uint streamlineLogLevel = 0; // 0=Off, 1=Default, 2=Verbose float sharpnessFSR = 0.0f; float sharpnessDLSS = 0.0f; - uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M - uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M + uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) + uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) + float vrDlssCropOffsetX = 0.0f; // 0.0-0.3, nasal offset fraction for DLSS crop position + float vrDlssFeatherWidth = 0.0f; // 0.0-0.1, feather width fraction at DLSS crop boundary (disabled pending fix) }; Settings settings; @@ -110,6 +114,7 @@ struct Upscaling : Feature virtual void Load() override; virtual void PostPostLoad() override; virtual void SetupResources() override; + virtual std::vector GetActiveConstraints() const override; UpscaleMethod GetUpscaleMethod() const; @@ -138,7 +143,11 @@ struct Upscaling : Feature winrt::com_ptr vrClearHMDMaskCB; // Helper to dispatch mask clearing for a single eye region void ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX); + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY = 0, + uint32_t depthWidth = 0, uint32_t depthHeight = 0, + uint32_t colorWidth = 0, uint32_t colorHeight = 0, + ID3D11ShaderResourceView* fallbackSRV = nullptr, uint32_t fallbackOffsetX = 0); // Shared VR Per-Eye Intermediate Buffers // Owned here so both Streamline (DLSS) and FidelityFX (FSR) can use them. @@ -148,6 +157,43 @@ struct Upscaling : Feature eastl::unique_ptr vrIntermediateMotionVectors[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateReactiveMask[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateTransparencyMask[2]; // per-eye render resolution + eastl::unique_ptr vrFinalOutput[2]; // per-eye display-res composition target (VR viewport scaling) + eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) + + // Periphery TAA (conductor approach) — used by two-call func() flow + winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) + eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) + + // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) + winrt::com_ptr vrPeripheryFillCS; + winrt::com_ptr vrPeripheryFillCB; + winrt::com_ptr vrLinearSampler; + + // Feathered composite compute shader (legacy, kept as fallback) + winrt::com_ptr vrFeatheredCompositeCS; + winrt::com_ptr vrFeatheredCompositeCB; + + // Feathered composite pixel shader approach (replaces CS to preserve periphery TAA) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + winrt::com_ptr vrFeatheredCompositePS; + winrt::com_ptr vrFeatheredCompositeBlendState; + + // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) + winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) + winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) + winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params + ID3D11PixelShader* GetDlssCompositePS(); + ID3D11PixelShader* GetDlssUpscalePS(); + + struct DlssCompositeCB + { + float2 DynResScale; // renderRes / displayRes per-eye + float2 EyeOffset; // (i * eyeWidth, 0) + float2 SrcTexSize; // full texture dimensions + float2 pad; + }; + void FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV = nullptr); // Helper to create/resize per-eye buffers matching source formats void CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight, uint32_t outWidth, uint32_t outHeight, @@ -160,11 +206,11 @@ struct Upscaling : Feature // Shared Pipeline Steps void PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* depthSrc, ID3D11Resource* mvecSrc, ID3D11Resource* reactiveSrc, ID3D11Resource* transparencySrc); - void FinalizePerEyeOutputs(ID3D11Resource* colorDst); + void FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only = false); void ConfigureTAA(); void ConfigureUpscaling(RE::BSGraphics::State* a_state); - void Upscale(); + void Upscale(ID3D11Texture2D* colorSourceOverride = nullptr); // D3D11 textures Texture2D* reactiveMaskTexture = nullptr; diff --git a/src/Features/Upscaling/Streamline.cpp b/src/Features/Upscaling/Streamline.cpp index 71eb3a3542..1b331002fb 100644 --- a/src/Features/Upscaling/Streamline.cpp +++ b/src/Features/Upscaling/Streamline.cpp @@ -7,6 +7,7 @@ #include "../../Hooks.h" #include "../../State.h" #include "../../Util.h" +#include "../TAAReorder.h" #include "../Upscaling.h" #include "DX12SwapChain.h" @@ -237,6 +238,20 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.cameraMotionIncluded = sl::Boolean::eTrue; slConstants.cameraPinholeOffset = { 0.f, 0.f }; + + // VR nasal offset: when the crop is shifted, tell DLSS the optical center is offset + if (globals::game::isVR) { + float nasalFrac = std::clamp(globals::features::upscaling.settings.vrDlssCropOffsetX, 0.0f, 0.3f); + float vpScale = std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + if (nasalFrac > 0.0f && vpScale < 1.0f) { + // Pinhole offset in NDC: how far the crop center is from the eye's optical axis + // Eye 0: shifted right (+X), Eye 1: shifted left (-X) + float shiftNDC = nasalFrac / vpScale; // normalized to crop width + float sign = (eyeIndex == 0) ? 1.0f : -1.0f; + slConstants.cameraPinholeOffset = { sign * shiftNDC, 0.f }; + } + } + slConstants.cameraRight = { viewMatrix._11, viewMatrix._12, viewMatrix._13 }; slConstants.cameraUp = { viewMatrix._21, viewMatrix._22, viewMatrix._23 }; slConstants.cameraFwd = { viewMatrix._31, viewMatrix._32, viewMatrix._33 }; @@ -245,6 +260,28 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.depthInverted = sl::Boolean::eFalse; if (globals::game::isVR) { + // When VR viewport scaling is active, DLSS processes a centered sub-region of each eye. + // The projection matrix must be adjusted to describe only the crop's FOV, not the full eye. + // Without this, DLSS's temporal reprojection maps pixels to wrong world positions, + // destroying temporal accumulation (causing aliasing and instability). + // Scaling rows 0 and 1 of the projection by 1/vpScale narrows the clip-space X/Y + // to match the crop region. clipToPrevClip must also be conjugated (see below). + float vpScale = std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Row 0 → clip.x, Row 1 → clip.y (Streamline row-major, P * pos convention) + slConstants.cameraViewToClip[0].x *= invScale; + slConstants.cameraViewToClip[0].y *= invScale; + slConstants.cameraViewToClip[0].z *= invScale; + slConstants.cameraViewToClip[0].w *= invScale; + slConstants.cameraViewToClip[1].x *= invScale; + slConstants.cameraViewToClip[1].y *= invScale; + slConstants.cameraViewToClip[1].z *= invScale; + slConstants.cameraViewToClip[1].w *= invScale; + // Narrow the reported FOV to match the crop + slConstants.cameraFOV = 2.0f * atanf(vpScale * tanf(slConstants.cameraFOV * 0.5f)); + } + // VR: compute clipToCameraView / clipToPrevClip / prevClipToClip from Skyrim's per-eye matrices. // recalculateCameraMatrices() uses a single static prev-frame slot -- unusable for two viewports. sl::matrixFullInvert(slConstants.clipToCameraView, slConstants.cameraViewToClip); @@ -258,7 +295,62 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye sl::float4x4 invCurrViewProj; sl::matrixFullInvert(invCurrViewProj, currViewProjSL); sl::matrixMul(slConstants.clipToPrevClip, invCurrViewProj, prevViewProjSL); + + // When viewport scaling is active, cameraViewToClip is adjusted (narrower FOV), + // changing the clip space. clipToPrevClip (computed from unadjusted VP) maps between + // unadjusted clip spaces. We must conjugate it to map between adjusted clip spaces: + // CTP_adj = inv(S) * CTP * S + // where S = diag(invScale, invScale, 1, 1), inv(S) = diag(vpScale, vpScale, 1, 1). + // + // Derivation (row-vector convention: clip = view * P): + // clip_adj = clip_unadj * S (scaling rows 0,1 of P scales clip x,y by invScale) + // clip_prev_adj = clip_prev_unadj * S + // clip_prev_unadj = clip_curr_unadj * CTP + // clip_prev_adj = (clip_curr_adj * inv(S)) * CTP * S = clip_curr_adj * (inv(S) * CTP * S) + // + // Element-wise: CTP_adj[i][j] = inv(S)[i] * CTP[i][j] * S[j] + // Rows 0,1, cols 0,1: vpScale * invScale = 1 (unchanged) + // Rows 0,1, cols 2,3: vpScale * 1 = vpScale + // Rows 2,3, cols 0,1: 1 * invScale = invScale + // Rows 2,3, cols 2,3: unchanged + // + // This ensures clipToPrevClip agrees with per-pixel motion vectors. + // Without correct conjugation, DLSS sees disagreement between the camera-predicted + // motion and per-pixel motion vectors, causing it to reject temporal accumulation + // during camera motion. (When still, CTP ≈ I, and inv(S)*I*S = I → no mismatch.) + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Rows 0,1 cols 2,3: multiply by vpScale (from left-multiply by inv(S)) + slConstants.clipToPrevClip[0].z *= vpScale; + slConstants.clipToPrevClip[0].w *= vpScale; + slConstants.clipToPrevClip[1].z *= vpScale; + slConstants.clipToPrevClip[1].w *= vpScale; + // Rows 2,3 cols 0,1: multiply by invScale (from right-multiply by S) + slConstants.clipToPrevClip[2].x *= invScale; + slConstants.clipToPrevClip[2].y *= invScale; + slConstants.clipToPrevClip[3].x *= invScale; + slConstants.clipToPrevClip[3].y *= invScale; + } + sl::matrixFullInvert(slConstants.prevClipToClip, slConstants.clipToPrevClip); + + // Per-eye diagnostic logging for temporal quality investigation + { + static uint32_t ctpDiagCounter = 0; + bool ctpDiag = (ctpDiagCounter++ % 300 == 0) || (TAAReorder::g_diagCounter == 0 && vpScale < 1.0f); + if (ctpDiag) { + auto& ctp = slConstants.clipToPrevClip; + logger::info("[DLSS-CTP] Eye {} clipToPrevClip diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, ctp[0].x, ctp[1].y, ctp[2].z, ctp[3].w); + logger::info("[DLSS-CTP] Eye {} prevVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, prevViewProjSL[0].x, prevViewProjSL[1].y, prevViewProjSL[2].z, prevViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} currVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, currViewProjSL[0].x, currViewProjSL[1].y, currViewProjSL[2].z, currViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} cameraPos=({:.2f},{:.2f},{:.2f}) fov={:.4f} mvecScale=({:.4f},{:.4f})", + eyeIndex, slConstants.cameraPos.x, slConstants.cameraPos.y, slConstants.cameraPos.z, + slConstants.cameraFOV, slConstants.mvecScale.x, slConstants.mvecScale.y); + } + } } else { recalculateCameraMatrices(slConstants); } @@ -268,7 +360,26 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.jitterOffset = { -jitter.x, -jitter.y }; slConstants.reset = sl::Boolean::eFalse; - slConstants.mvecScale = { 1.0f, 1.0f }; + // mvecScale normalizes motion vectors to [-1,1] range. The Streamline DLSS plugin + // then multiplies by the input render dimensions to get pixel displacement: + // MV_Scale = mvecScale * renderWidth + // The game's motion vectors are in [-1,1] normalized to the FULL per-eye dimensions. + // Without viewport scaling, renderWidth = eyeWidthIn → MV_Scale = eyeWidthIn → correct. + // With viewport scaling, renderWidth = cropWidthIn = eyeWidthIn * vpScale, so DLSS + // underestimates motion by vpScale. Compensate by scaling mvecScale by 1/vpScale. + if (globals::game::isVR && std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f) < 1.0f) { + float invScale = 1.0f / std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + slConstants.mvecScale = { invScale, invScale }; + } else { + slConstants.mvecScale = { 1.0f, 1.0f }; + } + // Log mvecScale after assignment (was previously logged before assignment, showing uninitialized values) + if (globals::game::isVR && TAAReorder::g_diagCounter == 0 && std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f) < 1.0f) { + logger::info("[TAAReorder] Eye {} mvecScale=({:.4f},{:.4f}) jitter=({:.4f},{:.4f})", + eyeIndex, slConstants.mvecScale.x, slConstants.mvecScale.y, + slConstants.jitterOffset.x, slConstants.jitterOffset.y); + } + slConstants.motionVectors3D = sl::Boolean::eFalse; slConstants.motionVectorsInvalidValue = FLT_MIN; slConstants.orthographicProjection = sl::Boolean::eFalse; @@ -277,6 +388,13 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye if (SL_FAILED(res, slSetConstants(slConstants, *frameToken, p_viewport))) { logger::error("[Streamline] Could not set constants for eye {}", eyeIndex); + } else { + static uint32_t constDiagCounter = 0; + if (constDiagCounter++ % 300 == 0) { + logger::info("[Streamline] slSetConstants OK eye={} jitter=({:.4f},{:.4f}) fov={:.4f}", + eyeIndex, slConstants.jitterOffset.x, slConstants.jitterOffset.y, + slConstants.cameraFOV); + } } } @@ -304,7 +422,7 @@ bool Streamline::IsRTXAndBelow40Series(IDXGIAdapter* a_adapter) return false; } -void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) +void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height) { sl::DLSSOptions dlssOptions{}; @@ -328,10 +446,8 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) break; } - auto state = globals::state; - dlssOptions.outputWidth = width; - dlssOptions.outputHeight = (uint)state->screenSize.y; + dlssOptions.outputHeight = height; // Detect HDR from kMAIN format at runtime -- VR kMAIN may be 8-bit while SE is FP16 { @@ -394,7 +510,7 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth) + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight) { auto context = globals::d3d::context; @@ -406,7 +522,7 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, sl::Resource transparencyMaskRes = { sl::ResourceType::eTex2d, transparencyMask, 0 }; CheckFrameConstants(vp, eyeIndex); - SetDLSSOptions(vp, outputWidth); + SetDLSSOptions(vp, outputWidth, outputHeight); sl::ResourceTag tags[] = { { &colorInRes, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &extentIn }, @@ -438,17 +554,24 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, if (state->frameAnnotations) state->EndPerfEvent(); - if (evalResult != sl::Result::eOk) { - static bool evalErrorLogged[2] = { false, false }; + // Rate-limited diagnostic logging for DLSS evaluation results + { + static uint32_t evalDiagCounter[2] = { 0, 0 }; uint32_t logIdx = globals::game::isVR ? eyeIndex : 0; - if (!evalErrorLogged[logIdx]) { - evalErrorLogged[logIdx] = true; - logger::error("[Streamline] slEvaluateFeature failed{} result={}", globals::game::isVR ? std::format(" for eye {}", eyeIndex) : "", (int)evalResult); + bool diagLog = (evalDiagCounter[logIdx]++ % 300 == 0); + + if (evalResult != sl::Result::eOk) { + if (diagLog) { + logger::error("[Streamline] slEvaluateFeature FAILED eye={} result={} (frame {})", + eyeIndex, (int)evalResult, evalDiagCounter[logIdx]); + } + } else if (diagLog) { + logger::info("[Streamline] slEvaluateFeature OK eye={} (frame {})", eyeIndex, evalDiagCounter[logIdx]); } } } -void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors) +void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only) { auto state = globals::state; @@ -458,9 +581,15 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r auto screenSize = state->screenSize; auto renderSize = Util::ConvertToDynamic(screenSize); - // VR: Combined-buffer mode with extent offsets causes temporal ghosting on the right eye - // because DLSS's internal history buffers use extent offsets as indices. - // Per-eye isolation with extents at {0,0} is required. + // VR: Per-eye isolation is required. Each eye uses a separate per-eye texture + // with its own viewport handle, avoiding cross-eye history contamination. + // When viewport scaling is active (vrDlssViewportScale < 1.0): + // - All DLSS inputs are physically cropped to the center sub-region at {0,0}. + // This eliminates non-zero subrect base offsets which break temporal reprojection. + // - Camera matrices are adjusted in CheckFrameConstants to match the crop's FOV. + // - FillPeriphery bilinear-upscales the full render-res input to vrFinalOutput, + // then FinalizePerEyeOutputs pastes the DLSS crop output into the center. + // When viewport scaling is off (scale == 1.0), all textures are full-size at {0,0}. if (globals::game::isVR) { auto& upscaling = globals::features::upscaling; uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); @@ -468,21 +597,50 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; + float vpScale = std::clamp(upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + bool viewportScaling = vpScale < 1.0f; + + uint32_t dlssWidthIn = viewportScaling ? (uint32_t)(eyeWidthIn * vpScale) : eyeWidthIn; + uint32_t dlssHeightIn = viewportScaling ? (uint32_t)(eyeHeightIn * vpScale) : eyeHeightIn; + uint32_t dlssWidthOut = viewportScaling ? (uint32_t)(eyeWidthOut * vpScale) : eyeWidthOut; + uint32_t dlssHeightOut = viewportScaling ? (uint32_t)(eyeHeightOut * vpScale) : eyeHeightOut; + upscaling.PreparePerEyeInputs(a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask); - for (uint32_t i = 0; i < 2; ++i) { + // Periphery TAA diagnostic + if (TAAReorder::g_diagCounter == 0 && viewportScaling && upscaling.settings.vrPeripheryTAA) { + logger::info("[TAAReorder] Periphery TAA: vrTAAdPerEye[0]={}, g_initialized={} (TAA injected at display RT level)", + (void*)upscaling.vrTAAdPerEye[0].get(), TAAReorder::g_initialized); + } + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { sl::ViewportHandle vp = (i == 1) ? viewportRight : viewport; - sl::Extent extentIn{ 0, 0, eyeWidthIn, eyeHeightIn }; - sl::Extent extentOut{ 0, 0, eyeWidthOut, eyeHeightOut }; + + if (viewportScaling) { + // Pre-fill composition target with bilinear upscale of full render-res eye. + // DLSS output is pasted on top in FinalizePerEyeOutputs. + upscaling.FillPeriphery(i, eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + } + + // All extents are {0,0} - inputs are physically crop-sized (or full-sized when not scaling). + // No non-zero subrect base offsets, which is critical for DLSS temporal reprojection. + sl::Extent extentIn = { 0, 0, dlssWidthIn, dlssHeightIn }; + sl::Extent extentOut = { 0, 0, dlssWidthOut, dlssHeightOut }; + + // When viewport scaling, use crop-sized vrCropColorIn; otherwise use full vrIntermediateColorIn + ID3D11Resource* colorInput = viewportScaling ? + upscaling.vrCropColorIn[i]->resource.get() : + upscaling.vrIntermediateColorIn[i]->resource.get(); EvaluateDLSS(vp, i, - upscaling.vrIntermediateColorIn[i]->resource.get(), upscaling.vrIntermediateColorOut[i]->resource.get(), + colorInput, upscaling.vrIntermediateColorOut[i]->resource.get(), upscaling.vrIntermediateDepth[i]->resource.get(), upscaling.vrIntermediateMotionVectors[i]->resource.get(), upscaling.vrIntermediateReactiveMask[i]->resource.get(), upscaling.vrIntermediateTransparencyMask[i]->resource.get(), - extentIn, extentOut, eyeWidthOut); + extentIn, extentOut, dlssWidthOut, dlssHeightOut); } - upscaling.FinalizePerEyeOutputs(a_upscalingTexture); + upscaling.FinalizePerEyeOutputs(a_upscalingTexture, eye0Only); } else { // Non-VR: Simple full-texture upscale sl::Extent extentIn{ 0, 0, (uint)renderSize.x, (uint)renderSize.y }; @@ -491,7 +649,7 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r EvaluateDLSS(viewport, 0, a_upscalingTexture, a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask, - extentIn, extentOut, (uint)screenSize.x); + extentIn, extentOut, (uint)screenSize.x, (uint)screenSize.y); } } /** diff --git a/src/Features/Upscaling/Streamline.h b/src/Features/Upscaling/Streamline.h index 0f771fb9a0..348f0b6849 100644 --- a/src/Features/Upscaling/Streamline.h +++ b/src/Features/Upscaling/Streamline.h @@ -70,7 +70,7 @@ class Streamline void EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth); + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight); // Cached DLL version info for Streamline plugin directory static std::vector> dllVersions; @@ -85,9 +85,9 @@ class Streamline bool IsRTXAndBelow40Series(IDXGIAdapter* a_adapter); - void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width); + void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height); - void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors); + void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only = false); void DestroyDLSSResources(); }; From 94424c6c761e06a48f4bb01057c49adc1140eab8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 25 Mar 2026 05:46:25 +0000 Subject: [PATCH 3/3] =?UTF-8?q?style:=20=F0=9F=8E=A8=20apply=20pre-commit.?= =?UTF-8?q?ci=20formatting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated formatting by clang-format, prettier, and other hooks. See https://pre-commit.ci for details. --- package/Shaders/Common/SharedData.hlsli | 8 ++--- .../Shaders/VRStereoOptimizations/modes.hlsli | 8 ++--- src/Features/TAAReorder.cpp | 33 ++++++++++++------- src/Features/Upscaling.cpp | 1 - src/Features/VR.h | 2 +- src/Features/VRStereoOptimizations.cpp | 2 -- src/Features/VRStereoOptimizations.h | 2 +- 7 files changed, 32 insertions(+), 24 deletions(-) diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index d4e54c77a3..cb5d3c0416 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -20,10 +20,10 @@ namespace SharedData float Timer; uint FrameCount; uint FrameCountAlwaysActive; - bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon - bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) - bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach - float MipBias; // Offset to mip level for TAA sharpness + bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon + bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) + bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach + float MipBias; // Offset to mip level for TAA sharpness float pad0; float4 AmbientSHR; float4 AmbientSHG; diff --git a/package/Shaders/VRStereoOptimizations/modes.hlsli b/package/Shaders/VRStereoOptimizations/modes.hlsli index 95fb721833..b693dedcc3 100644 --- a/package/Shaders/VRStereoOptimizations/modes.hlsli +++ b/package/Shaders/VRStereoOptimizations/modes.hlsli @@ -1,10 +1,10 @@ #ifndef __VR_STEREO_OPT_MODES_HLSLI__ #define __VR_STEREO_OPT_MODES_HLSLI__ -#define MODE_DISOCCLUDED 0 -#define MODE_EDGE 1 -#define MODE_MAIN 2 +#define MODE_DISOCCLUDED 0 +#define MODE_EDGE 1 +#define MODE_MAIN 2 #define MODE_EDGE_NEIGHBOUR 3 -#define MODE_FULL_BLEND 4 +#define MODE_FULL_BLEND 4 #endif diff --git a/src/Features/TAAReorder.cpp b/src/Features/TAAReorder.cpp index 015a95eee6..6d1f56dfcc 100644 --- a/src/Features/TAAReorder.cpp +++ b/src/Features/TAAReorder.cpp @@ -305,7 +305,8 @@ namespace TAAReorder D3D11_MAPPED_SUBRESOURCE mapped{}; context->Map(upscaling.vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - struct { + struct + { float originX, originY; float sizeX, sizeY; float featherWidth; @@ -347,16 +348,26 @@ namespace TAAReorder context->PSSetConstantBuffers(0, 1, &oldPSCB); context->OMSetRenderTargets(1, &oldRTV, oldDSV); - if (oldBlendState) oldBlendState->Release(); - if (oldRTV) oldRTV->Release(); - if (oldDSV) oldDSV->Release(); - if (oldVS) oldVS->Release(); - if (oldPS) oldPS->Release(); - if (oldIL) oldIL->Release(); - if (oldRS) oldRS->Release(); - if (oldPSSRV) oldPSSRV->Release(); - if (oldPSSampler) oldPSSampler->Release(); - if (oldPSCB) oldPSCB->Release(); + if (oldBlendState) + oldBlendState->Release(); + if (oldRTV) + oldRTV->Release(); + if (oldDSV) + oldDSV->Release(); + if (oldVS) + oldVS->Release(); + if (oldPS) + oldPS->Release(); + if (oldIL) + oldIL->Release(); + if (oldRS) + oldRS->Release(); + if (oldPSSRV) + oldPSSRV->Release(); + if (oldPSSampler) + oldPSSampler->Release(); + if (oldPSCB) + oldPSCB->Release(); pasteRTV->Release(); } else { diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index d58c75c4fc..0e89c3e338 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -2536,7 +2536,6 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 BSImagespaceShaderISTemporalAA->taaEnabled = false; } - } void Upscaling::SetScissorRect::thunk(RE::BSGraphics::Renderer* This, int a_left, int a_top, int a_right, int a_bottom) diff --git a/src/Features/VR.h b/src/Features/VR.h index f3c2a1807f..fe8f28bb79 100644 --- a/src/Features/VR.h +++ b/src/Features/VR.h @@ -1,9 +1,9 @@ #pragma once #include "Menu.h" #include "OverlayFeature.h" -#include "VRStereoOptimizations.h" #include "Utils/Input.h" #include "VR/OpenVRDetection.h" // In Features/VR/ +#include "VRStereoOptimizations.h" #include #include #include diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp index 6e540fe6f5..98da4c21ce 100644 --- a/src/Features/VRStereoOptimizations.cpp +++ b/src/Features/VRStereoOptimizations.cpp @@ -206,7 +206,6 @@ void VRStereoOptimizations::CompileShaders() reprojectionCS.attach(reinterpret_cast(ptr)); else logger::error("[VRStereoOptimizations] Failed to compile ReprojectionCS"); - } void VRStereoOptimizations::ClearShaderCache() @@ -648,4 +647,3 @@ void VRStereoOptimizations::DeactivateStencil() logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); stencilActive = false; } - diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h index ff264d8747..57683e45bf 100644 --- a/src/Features/VRStereoOptimizations.h +++ b/src/Features/VRStereoOptimizations.h @@ -42,7 +42,7 @@ struct VRStereoOptimizations MODE_EDGE = 1, ///< Fully shaded + bilateral blend with other eye MODE_MAIN = 2, ///< Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite (Perf) / bilateral (Quality) MODE_EDGE_NEIGHBOUR = 3, ///< Outer band: background pixels near edge, blended in post-process - MODE_FULL_BLEND = 4, ///< Near-camera pixels: fully shaded in both eyes + bilateral blended + MODE_FULL_BLEND = 4, ///< Near-camera pixels: fully shaded in both eyes + bilateral blended }; //=============================================================================