diff --git a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli index 4152bdb2d5..bd18791f4a 100644 --- a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli +++ b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli @@ -325,6 +325,7 @@ namespace ExtendedMaterials float2 GetParallaxCoords(float distance, float2 coords, float mipLevel, float3 viewDir, float3x3 tbn, float noise, Texture2D tex, SamplerState texSampler, uint channel, DisplacementParams params, out float pixelOffset) #endif { + pixelOffset = 0; float3 viewDirTS = normalize(mul(tbn, viewDir)); #if defined(LANDSCAPE) viewDirTS.xy /= viewDirTS.z * 0.7 + 0.3 + params[0].FlattenAmount; // Fix for objects at extreme viewing angles @@ -496,7 +497,7 @@ namespace ExtendedMaterials #endif nearBlendToFar *= nearBlendToFar; float offset = (1.0 - parallaxAmount) * -maxHeight + minHeight; - pixelOffset = lerp(parallaxAmount * scale, 0, nearBlendToFar); + pixelOffset = saturate(lerp(parallaxAmount, 0.5, nearBlendToFar)); return lerp(viewDirTS.xy * offset + coords.xy, coords, nearBlendToFar); } diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl index 46e34b175e..357f9efd28 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl @@ -19,6 +19,10 @@ RWTexture2D outAccumFrames : register(u0); RWTexture2D outIlY : register(u1); RWTexture2D outIlCoCg : register(u2); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + // samples = 8, min distance = 0.5, average samples on radius = 2 static const float3 g_Poisson8[8] = { float3(-0.4706069, -0.4427112, +0.6461146), @@ -88,6 +92,20 @@ float2x2 getRotationMatrix(float noise) // Early exit if dispatch thread is outside frame bounds if (any(dtid >= uint2(OUT_FRAME_DIM))) return; + +#if defined(VR_STEREO_OPT) + { + float2 uv = (dtid + .5) * RCP_OUT_FRAME_DIM; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) + return; + } + } +#endif + const float2 frameScale = FrameDim * RcpTexDim; float radius = BlurRadius; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl index ebc8b08956..0800601eae 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl @@ -46,6 +46,10 @@ Texture2D srcPrevY : register(t6); // maybe half-res Texture2D srcPrevCoCg : register(t7); // maybe half-res Texture2D srcPrevGISpecular : register(t8); // maybe half-res +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D outAo : register(u0); RWTexture2D outY : register(u1); RWTexture2D outCoCg : register(u2); @@ -343,6 +347,15 @@ void CalculateGI( float2 uv = (pxCoord + .5) * RCP_OUT_FRAME_DIM; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) + return; + } +#endif + float viewspaceZ = READ_DEPTH(srcWorkingDepth, pxCoord); float2 normalSample = FULLRES_LOAD(srcNormalRoughness, pxCoord, uv * frameScale, samplerLinearClamp).xy; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl index 47317a1c87..af14abf417 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl @@ -15,6 +15,10 @@ Texture2D srcPrevIlY : register(t7); // maybe half-res Texture2D srcPrevIlCoCg : register(t8); // maybe half-res Texture2D srcPrevGISpecular : register(t9); // maybe half-res +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D outRadianceDisocc : register(u0); RWTexture2D outAccumFrames : register(u1); RWTexture2D outRemappedAo : register(u2); @@ -75,6 +79,16 @@ void readHistory( const float2 uv = (pixCoord + .5) * RCP_OUT_FRAME_DIM; const uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) + return; + } +#endif + const float2 screen_pos = Stereo::ConvertFromStereoUV(uv, eyeIndex); float2 prev_screen_pos = screen_pos; diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl index 365e50236f..3c5cc748cf 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/stereoSync.cs.hlsl @@ -17,6 +17,10 @@ Texture2D srcAo : register(t1); Texture2D srcIlY : register(t2); Texture2D srcIlCoCg : register(t3); +# if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +# endif + RWTexture2D outAo : register(u0); RWTexture2D outIlY : register(u1); RWTexture2D outIlCoCg : register(u2); @@ -59,6 +63,17 @@ float4 SampleCrossDepths(float2 centerUV, float2 step, float2 texScale, uint eye uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); +# if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint2 fullResPx = uint2(uv * FrameDim); + uint mode = StereoOptModeTexture[fullResPx]; + if (mode == 1 || mode == 2) { + Passthrough(dtid); + return; + } + } +# endif + // SSGI working depth is linear view-space Z. // 0.0 = mask (outside lens area). FP_Z = first-person hands threshold (~18.0). float depth = srcDepth.SampleLevel(samplerPointClamp, uv * frameScale, RES_MIP); diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl index 9a7015db59..6e4913399d 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl @@ -1,6 +1,7 @@ // depth-aware upsampling: https://gist.github.com/pixelmager/a4364ea18305ed5ca707d89ddc5f8743 #include "Common/FastMath.hlsli" +#include "Common/VR.hlsli" #include "ScreenSpaceGI/common.hlsli" Texture2D srcDepth : register(t0); @@ -9,6 +10,10 @@ Texture2D srcIlY : register(t2); // half-res Texture2D srcIlCoCg : register(t3); // half-res Texture2D srcGiSpecular : register(t4); // half-res +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + RWTexture2D outAo : register(u0); RWTexture2D outIlY : register(u1); RWTexture2D outIlCoCg : register(u2); @@ -23,6 +28,19 @@ RWTexture2D outGiSpecular : register(u3); // Early exit if dispatch thread is outside frame bounds if (any(dtid >= uint2(FrameDim))) return; + +#if defined(VR_STEREO_OPT) + { + float2 uv = (dtid + .5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[dtid]; + if (mode == 1 || mode == 2) + return; + } + } +#endif + #ifdef HALF_RES int2 px00 = (dtid >> 1) + (dtid & 1) - 1; #else // QUARTER_RES diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl index 132ad940b1..19982b474b 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl @@ -50,7 +50,10 @@ cbuffer PerFrame : register(b1) parameters.DynamicRes = DynamicRes; - parameters.UsePrecisionOffset = true; + // VR note: precision offset adds a depth bias that can cause subtle shadow + // shifting. Disabled to match the old (stable) SSS implementation. + // See: docs/development/Old code/RaymarchCS.hlsl + parameters.UsePrecisionOffset = false; WriteScreenSpaceShadow(parameters, groupID, groupThreadID); } \ No newline at end of file diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli index 0d1f221726..b4a70a1fdf 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ScreenSpaceShadows.hlsli @@ -1,10 +1,82 @@ +// Screen Space Shadows consumption helper. +// Non-VR: depth-weighted 4-sample Poisson blur for spatial denoising. +// VR: direct Load — the Poisson blur's per-pixel noise rotation is +// screen-position-dependent, causing shadows to shift on camera movement. +// Without TAA to average out the rotation noise, the instability hits +// the final output directly. Matches the stable v1.2 VR implementation. + +#include "Common/Math.hlsli" namespace ScreenSpaceShadows { Texture2D ScreenSpaceShadowsTexture : register(t45); + float4 GetBlurWeights(float4 depths, float centerDepth) + { + centerDepth += 1.0; + float depthSharpness = saturate((1024.0 * 1024.0) / (centerDepth * centerDepth)); + float4 depthDifference = (depths - centerDepth) * depthSharpness; + return exp2(-depthDifference * depthDifference); + } + float GetScreenSpaceShadow(float3 screenPosition, float2 uv, float noise, uint eyeIndex) { - return ScreenSpaceShadowsTexture.Load(int3(int2(screenPosition.xy + 0.5f), 0)).x; +#if defined(VR) + // VR: direct sample, no spatial blur. The Poisson blur's per-pixel noise + // rotation is screen-position-dependent — camera movement changes the + // rotation angle for the same world surface, causing shadows to visually + // shift. Without TAA to average out the rotation noise, the per-frame + // instability hits the final output directly. Direct Load avoids this. + // Matches the stable v1.2 VR implementation. + return ScreenSpaceShadowsTexture.Load(int3(screenPosition.xy, 0)); +#else + // Flat: depth-weighted 4-sample Poisson blur for spatial denoising. + // Rotated per-pixel by screen-space noise to break structured patterns. + // TAA averages out the rotation noise across frames. + noise *= Math::TAU; + + half2x2 rotationMatrix = half2x2(cos(noise), sin(noise), -sin(noise), cos(noise)); + + float4 shadowSamples = 0; + float4 depthSamples = 0; + +# if defined(DEFERRED) && !defined(DO_ALPHA_TEST) + depthSamples[0] = screenPosition.z; +# else + depthSamples[0] = SharedData::DepthTexture.Load(int3(screenPosition.xy, 0)); +# endif + + shadowSamples[0] = ScreenSpaceShadowsTexture.Load(int3(screenPosition.xy, 0)); + + static const float2 BlurOffsets[3] = { + float2(-0.6720635096678028f, 0.6601738628451107f), + float2(0.6110340335380645f, 0.5269905984201742f), + float2(0.20239029763403027f, -0.7841160574831084f), + }; + + [unroll] for (uint i = 1; i < 4; i++) + { + float2 offset = mul(BlurOffsets[i - 1], rotationMatrix) * 0.0025; + + float2 sampleUV = uv + offset; + sampleUV = saturate(sampleUV); + + int3 sampleCoord = SharedData::ConvertUVToSampleCoord(sampleUV, eyeIndex); + + depthSamples[i] = SharedData::DepthTexture.Load(sampleCoord).x; + shadowSamples[i] = ScreenSpaceShadowsTexture.Load(sampleCoord); + } + + depthSamples = SharedData::GetScreenDepths(depthSamples); + + float4 blurWeights = GetBlurWeights(depthSamples, depthSamples[0]); + float shadow = dot(shadowSamples, blurWeights); + + float blurWeightsTotal = dot(blurWeights, 1.0); + [flatten] if (blurWeightsTotal > 0.0) + shadow = shadow / blurWeightsTotal; + + return shadow; +#endif } -} \ No newline at end of file +} diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl index 92f0066261..67dee6957e 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/StereoSyncCS.hlsl @@ -15,6 +15,10 @@ Texture2D SrcDepthTexture : register(t0); Texture2D SrcShadowTexture : register(t1); +# if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +# endif + RWTexture2D OutShadowTexture : register(u0); cbuffer StereoSyncCB : register(b1) @@ -90,6 +94,18 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); +# if defined(VR_STEREO_OPT) + // Eye 1 pixels with mode 1 (edge) or 2 (main) will be overwritten by StereoBlend + // reprojection, so skip the expensive stereo sync work and write neutral (unshadowed). + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[uint2(dtid.xy)] & 0x0F; + if (mode == 1 || mode == 2) { + OutShadowTexture[dtid] = 1.0; // 1.0 = no shadow (neutral) + return; + } + } +# endif + float depth = SrcDepthTexture[dtid]; // depth == 0: VR HMD mask; depth == 1: sky/far plane diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli index 5a569d732f..6ec8ed5316 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli @@ -225,17 +225,15 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int // We sample depth twice per pixel per sample, and interpolate with an edge detect filter // Interpolation should only occur on the minor axis of the ray - major axis coordinates should be at pixel centers half2 read_xy = floor(pixel_xy); - - read_xy *= inParameters.DynamicRes; - -#if defined(VR) - read_xy *= half2(0.5, 1.0); -#endif + // VR fix: do NOT pre-scale read_xy here. DynamicRes and VR 0.5x must be + // applied AFTER offset_xy addition so the bilinear neighbor is exactly + // 1 texel away. Pre-scaling causes the offset to sample ~3px away, + // breaking edge detection and causing shadow instability on camera movement. + // See: docs/development/Old code/bend_sss_gpu.hlsli for the correct ordering. half minor_axis = x_axis_major ? pixel_xy.y : pixel_xy.x; - // If a pixel has been detected as an edge, then optionally (inParameters.IgnoreEdgePixels) don't include it in the shadow - const half edge_skip = 1e20; // if edge skipping is enabled, apply an extreme value/blend on edge samples to push the value out of range + const half edge_skip = 1e20; half2 depths; half bilinear = frac(minor_axis) - 0.5; @@ -247,34 +245,47 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int half bias = bilinear > 0 ? 1 : -1; half2 offset_xy = half2(x_axis_major ? 0 : bias, x_axis_major ? bias : 0); - // HLSL enforces that a pixel offset is a compile-time constant, which isn't strictly required (and can sometimes be a bit faster) - // So this fallback will use a manual uv offset instead - half2 coord = read_xy * inParameters.InvDepthTextureSize; - half2 coord_with_offset = (read_xy + offset_xy) * inParameters.InvDepthTextureSize; + // VR fix: scale by DynamicRes AFTER offset_xy is incorporated, so the + // offset represents exactly 1 texel in the final UV space. + half2 coord = read_xy * inParameters.InvDepthTextureSize * inParameters.DynamicRes; + half2 coord_with_offset = (read_xy + offset_xy) * inParameters.InvDepthTextureSize * inParameters.DynamicRes; #if defined(VR) + // VR side-by-side: halve x to map stereo pixel coords to texture UV + coord *= half2(0.5, 1.0); + coord_with_offset *= half2(0.5, 1.0); + # if defined(RIGHT) - // Right eye: valid UV range is [0.5, 1.0] + // Right eye: valid UV range is [0.5*DynRes.x, DynRes.x] bool coord_out_of_eye = coord.x < 0.5 * inParameters.DynamicRes.x; bool coord_offset_out_of_eye = coord_with_offset.x < 0.5 * inParameters.DynamicRes.x; # else - // Left eye: valid UV range is [0.0, 0.5) + // Left eye: valid UV range is [0.0, 0.5*DynRes.x) bool coord_out_of_eye = coord.x >= 0.5 * inParameters.DynamicRes.x; bool coord_offset_out_of_eye = coord_with_offset.x >= 0.5 * inParameters.DynamicRes.x; # endif + // Clamp cross-eye depth reads to FarDepthValue (1.0) so rays near the SBS + // center seam don't sample the other eye's depth. At distance, stereo parallax + // makes cross-eye depth noticeably different, causing shadow patterns to shift + // with camera movement. Clamping to 1.0 means the ray sees “no occluder” at + // the boundary — shadow weakens by ~1 pixel but stays temporally stable. + // The WRITE guard is intentionally removed (see below GroupMemoryBarrier section) + // so both dispatches write to the seam overlap, preventing a visible gap/line. depths.x = coord_out_of_eye ? 1.0 : inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord, 0); depths.y = coord_offset_out_of_eye ? 1.0 : inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord_with_offset, 0); - depths.x = lerp(depths.x, 1.0, (float)(depths.x == 0)); // Stencil area - depths.y = lerp(depths.y, 1.0, (float)(depths.y == 0)); // Stencil area + // VR HMD mask: depth==0 is outside the visible lens area. Remap to + // FarDepthValue (1.0) so mask pixels don't cast false shadows. + depths.x = lerp(depths.x, 1.0, (float)(depths.x == 0)); + depths.y = lerp(depths.y, 1.0, (float)(depths.y == 0)); #else depths.x = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord, 0); depths.y = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, coord_with_offset, 0); #endif // Depth thresholds (bilinear/shadow thickness) are based on a fractional ratio of the difference between sampled depth and the far clip depth - depth_thickness_scale[i] = abs(inParameters.FarDepthValue - depths.x); + depth_thickness_scale[i] = max(abs(inParameters.FarDepthValue - depths.x), 1e-4); // If depth variance is more than a specific threshold, then just use point filtering bool use_point_filter = abs(depths.x - depths.y) > depth_thickness_scale[i] * inParameters.BilinearThreshold; @@ -321,19 +332,6 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int // Sync wavefronts now groupshared DepthData is written GroupMemoryBarrierWithGroupSync(); -#if defined(VR) - // Check if the pixel we're writing to is on the correct eye side - half writeX = write_xy.x * inParameters.InvDepthTextureSize.x; - -# if defined(RIGHT) - if (writeX < 0.0) - return; -# else - if (writeX > 1.0) - return; -# endif -#endif - half start_depth = sampling_depth[0]; if (start_depth == 0.0 || start_depth == 1.0) @@ -381,5 +379,6 @@ void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int // Asking the GPU to write scattered single-byte pixels isn't great, // But thankfully the latency is hidden by all the work we're doing... + inParameters.OutputTexture[(int2)write_xy] = result; } \ No newline at end of file diff --git a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl index df107d9175..dc5d38d4b2 100644 --- a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl @@ -4,20 +4,48 @@ // depth == 0.0 is the unrendered/hidden area value (Skyrim reversed-Z: far plane = 0). // DepthIn is the combined stereo depth buffer; DepthOffsetX selects the eye's half. // ColorInOut is the isolated per-eye buffer; ColorOffsetX is always 0. +// +// When DepthWidth > 0, coordinate scaling is enabled: depth is at render-res while +// color is at display-res. The shader maps display-res color coordinates to render-res +// depth coordinates for the mask lookup. +// +// FallbackIn (t1): when bound, masked pixels read from this texture instead of writing +// black. When unbound, D3D11 returns (0,0,0,0) — same as clearing to black. +// FallbackOffsetX selects the eye's half in the stereo fallback texture. cbuffer ClearHMDMaskCB : register(b0) { - uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) - uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) - uint pad0; - uint pad1; + uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) + uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) + uint DepthOffsetY; // Y offset into combined stereo depth (non-zero when viewport scaling crops vertically) + uint FallbackOffsetX; // X offset into FallbackIn for stereo (0 when unused or left eye) + // Optional coordinate scaling (zero = disabled, for backwards compat) + uint DepthWidth; // render-res eye width; if 0, no scaling (1:1 depth/color coords) + uint DepthHeight; // render-res eye height + uint ColorWidth; // display-res eye width + uint ColorHeight; // display-res eye height }; Texture2D DepthIn : register(t0); +Texture2D FallbackIn : register(t1); RWTexture2D ColorInOut : register(u0); [numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { - // Read from stereo depth, write to potentially stereo color - if (DepthIn[dispatchID.xy + uint2(DepthOffsetX, 0)] == 0.0) - ColorInOut[dispatchID.xy + uint2(ColorOffsetX, 0)] = float4(0.0, 0.0, 0.0, 0.0); + uint2 colorPos = dispatchID.xy + uint2(ColorOffsetX, 0); + uint2 depthPos; + + if (DepthWidth > 0) { + // Scale from display-res color coordinates to render-res depth coordinates + depthPos = uint2( + (dispatchID.x * DepthWidth) / ColorWidth, + (dispatchID.y * DepthHeight) / ColorHeight) + + uint2(DepthOffsetX, DepthOffsetY); + } else { + depthPos = dispatchID.xy + uint2(DepthOffsetX, DepthOffsetY); + } + + if (DepthIn[depthPos] == 0.0) + ColorInOut[colorPos] = FallbackIn[dispatchID.xy + uint2(FallbackOffsetX, 0)]; + // When FallbackIn is unbound (existing callers): returns (0,0,0,0) → clears to black + // When FallbackIn is bound (TAA mask restore): returns display RT content } diff --git a/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl new file mode 100644 index 0000000000..8cf6b900d3 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl @@ -0,0 +1,48 @@ +// Format-converting fullscreen pixel shader with optional bilinear upscale. +// Used by TAAReorder to composite between textures of different DXGI formats +// (e.g. R8G8B8A8_UNORM conductor RTs <-> R11G11B10_FLOAT kMAIN). +// The GPU's output merger handles format conversion automatically. +// +// BILINEAR_UPSCALE variant: upscales render-res content to display-res by +// mapping output pixel positions through the dynamic resolution scale, +// like PureDark's dynamicResScale in his blend shader. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D Source : register(t0); + +# ifdef BILINEAR_UPSCALE + +cbuffer CompositeCB : register(b0) +{ + float2 DynResScale; // renderRes / displayRes (per-eye) + float2 EyeOffset; // (i * eyeWidth, 0) in texels + float2 SrcTexSize; // full texture dimensions in texels + float2 pad; +}; + +SamplerState LinearSampler : register(s0); + +float4 main(VS_OUTPUT input) : SV_Target +{ + // Map display-res pixel position to render-res source position. + // Subtract eye offset, scale to render-res, add eye offset back. + float2 localPos = input.Position.xy - EyeOffset; + float2 srcLocal = localPos * DynResScale; + float2 srcPos = srcLocal + EyeOffset; + float2 srcUV = srcPos / SrcTexSize; + return Source.SampleLevel(LinearSampler, srcUV, 0); +} + +# else + +float4 main(VS_OUTPUT input) : SV_Target +{ + return Source.Load(int3(input.Position.xy, 0)); +} + +# endif // BILINEAR_UPSCALE + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl new file mode 100644 index 0000000000..e5650af665 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl @@ -0,0 +1,70 @@ +/** + * @file DepthUpscalePS.hlsl + * @brief Point-sampled depth buffer upscaling for VR depth-based culling + * + * When upscaling (FSR/DLSS) is active, the depth buffer is rendered at a lower + * resolution than the display. Skyrim VR's depth-based culling (OBBOcclusionTesting) + * reads from the depth buffer to determine object visibility, but with a mismatched + * resolution, objects may be incorrectly culled (appearing to flicker in/out of view). + * + * This shader upscales the low-resolution depth buffer to full resolution using + * pure point sampling. Previous conservative blending (GatherRed + lerp toward + * min depth) caused HAM mask bleed: depth == 0 values from the hidden area mesh + * leaked into valid depth through the 2x2 neighborhood blend, creating artifacts + * at the mask boundary after DRS upscaling. + * + * Based on depth upscaling approach by vrnord + * https://github.com/vrnord/skyrim-community-shaders-VR-DLSS + */ + +#include "Upscaling/UpscaleVS.hlsl" + +#if defined(PSHADER) +# include "Common/FrameBuffer.hlsli" +# include "Common/SharedData.hlsli" + +typedef VS_OUTPUT PS_INPUT; + +struct PS_OUTPUT +{ + float Depth: SV_Depth; +}; + +Texture2D DepthLowRes : register(t0); + +cbuffer DepthUpscaleCB : register(b0) +{ + float2 SourceDim; // Full texture dimensions (texels) + float2 InvSourceDim; // 1.0 / SourceDim + float2 Scale; // resolutionScale (render/display ratio) + float2 Pad; +}; + +/** + * @brief Main pixel shader entry point + * + * Pure point-sampled depth upscaling. Maps display-res pixel position to + * render-res texel and loads directly — no blending, no mask bleed. + */ +PS_OUTPUT main(PS_INPUT input) +{ + PS_OUTPUT psout; + + // Map full-res UV to render-res UV (same transform as the engine's + // GetDynamicResolutionAdjustedScreenPosition). + float2 uv = Scale * input.TexCoord; + + // Per-eye clamping for SBS stereo: prevent sampling across the center seam. + bool isRight = input.TexCoord.x >= 0.5; + float halfScale = 0.5 * Scale.x; + uv.x = clamp(uv.x, isRight ? halfScale : 0.0, isRight ? Scale.x : halfScale); + uv.y = clamp(uv.y, 0.0, Scale.y); + + // Nearest texel coordinate — pure point sampling, no blending + int2 texel = int2(floor(uv * SourceDim)); + psout.Depth = DepthLowRes.Load(int3(texel, 0)); + + return psout; +} + +#endif diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl new file mode 100644 index 0000000000..16116fb4e0 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl @@ -0,0 +1,41 @@ +cbuffer FeatherCB : register(b0) +{ + uint CropX; // paste position X in output space + uint CropY; // paste position Y in output space + uint CropW; // crop width + uint CropH; // crop height + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float3 pad; +}; + +Texture2D CropTexture : register(t0); // DLSS output (crop-sized, at {0,0}) +RWTexture2D OutputTexture : register(u0); // vrFinalOutput (already filled with periphery) + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + // dispatchID is in crop-local space (0..CropW-1, 0..CropH-1) + int2 cropLocal = int2(dispatchID.xy); + if (cropLocal.x >= (int)CropW || cropLocal.y >= (int)CropH) + return; + + // Output pixel = crop-local + paste offset + int2 pixel = cropLocal + int2(CropX, CropY); + + // Distance from nearest crop edge (positive = inside) + float distLeft = (float)cropLocal.x; + float distRight = (float)(CropW - 1 - cropLocal.x); + float distTop = (float)cropLocal.y; + float distBottom = (float)(CropH - 1 - cropLocal.y); + float distFromEdge = min(min(distLeft, distRight), min(distTop, distBottom)); + + float4 dlss = CropTexture.Load(int3(cropLocal, 0)); + + if (FeatherWidth <= 0.0 || distFromEdge >= FeatherWidth) { + // Inside crop interior or no feathering: 100% DLSS + OutputTexture[pixel] = dlss; + } else { + // Feather zone: smooth blend from periphery (TAA-stabilized) to DLSS + float blend = smoothstep(0.0, FeatherWidth, distFromEdge); + float4 periphery = OutputTexture[pixel]; + OutputTexture[pixel] = lerp(periphery, dlss, blend); + } +} diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl new file mode 100644 index 0000000000..fb6ae3f277 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl @@ -0,0 +1,56 @@ +// Feathered DLSS crop composite using hardware alpha blending. +// Based on PureDark's approach from Skyrim-Upscaler VR (MIT license). +// +// The render target already contains TAA'd periphery content. +// We output float4(DLSSColor, featherAlpha) and let the output merger's +// SrcAlpha/InvSrcAlpha blend preserve the periphery in the feather zone +// and outside the crop rect entirely. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D CropTexture : register(t0); +SamplerState LinearSampler : register(s0); + +cbuffer FeatheredCompositeCB : register(b0) +{ + float2 CropOrigin; // paste position (x, y) in output-eye pixel coords + float2 CropSize; // crop width, height in pixels + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float _pad0; + float2 SrcUVOrigin; // UV origin in source texture for this crop region + float2 SrcUVScale; // UV scale: maps [0,1] crop-local UV to source texture UV range +}; + +float4 main(VS_OUTPUT input) : SV_Target +{ + float2 pixelPos = input.Position.xy; + + // Distance from each edge of the crop rect (positive = inside) + float distLeft = pixelPos.x - CropOrigin.x; + float distRight = (CropOrigin.x + CropSize.x) - pixelPos.x; + float distTop = pixelPos.y - CropOrigin.y; + float distBottom = (CropOrigin.y + CropSize.y) - pixelPos.y; + + float minDist = min(min(distLeft, distRight), min(distTop, distBottom)); + + // Outside crop rect: fully transparent (hardware blend preserves TAA'd periphery) + if (minDist <= 0.0) + return float4(0, 0, 0, 0); + + // Feather alpha: smoothstep ramp from 0 at edge to 1 at FeatherWidth inside + // (matches the smoothstep from the original CS for visual consistency) + float alpha = (FeatherWidth > 0.0) ? smoothstep(0.0, FeatherWidth, minDist) : 1.0; + + // Map pixel position to crop-local UV [0,1], then remap to source texture UV. + // For per-eye textures: SrcUVOrigin=(0,0), SrcUVScale=(1,1) (identity). + // For SBS textures: SrcUVOrigin/Scale select the correct eye's crop region. + float2 cropUV = (pixelPos - CropOrigin) / CropSize; + float2 srcUV = cropUV * SrcUVScale + SrcUVOrigin; + float3 dlssColor = CropTexture.SampleLevel(LinearSampler, srcUV, 0).rgb; + + return float4(dlssColor, alpha); +} + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl new file mode 100644 index 0000000000..b7c3272b83 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl @@ -0,0 +1,12 @@ +// Forces alpha to 1.0 across the entire texture. +// Used after DLSS center paste onto submit texture to ensure Scaleform UI renders. +// DLSS output may have alpha=0 (from R11G11B10→R8G8B8A8 conversion with no alpha source), +// which can prevent UI compositing in the DLSS center area. + +RWTexture2D ColorInOut : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + float4 c = ColorInOut[dispatchID.xy]; + c.a = 1.0; + ColorInOut[dispatchID.xy] = c; +} diff --git a/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl new file mode 100644 index 0000000000..315541e76d --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl @@ -0,0 +1,24 @@ +// Bilinear upscale from render-resolution per-eye buffer to display-resolution per-eye buffer. +// Used for VR viewport scaling: fills the full eye output with a cheap upscale so the +// periphery (outside the DLSS-processed center) is not black/empty. + +cbuffer PeripheryFillCB : register(b0) +{ + uint SrcWidth; + uint SrcHeight; + uint DstWidth; + uint DstHeight; +}; + +Texture2D SrcTexture : register(t0); +SamplerState LinearSampler : register(s0); +RWTexture2D DstTexture : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + if (dispatchID.x >= DstWidth || dispatchID.y >= DstHeight) + return; + + // Normalized UV with half-pixel offset for correct bilinear sampling + float2 uv = (float2(dispatchID.xy) + 0.5) / float2(DstWidth, DstHeight); + DstTexture[dispatchID.xy] = SrcTexture.SampleLevel(LinearSampler, uv, 0); +} diff --git a/features/VR Stereo Optimizations/Shaders/Features/VRStereoOptimizations.ini b/features/VR Stereo Optimizations/Shaders/Features/VRStereoOptimizations.ini new file mode 100644 index 0000000000..000b60a568 --- /dev/null +++ b/features/VR Stereo Optimizations/Shaders/Features/VRStereoOptimizations.ini @@ -0,0 +1,2 @@ +[Info] +Version = 1-0-0 diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 4ea0d4d07c..959678edff 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -20,11 +20,16 @@ namespace SharedData float Timer; uint FrameCount; uint FrameCountAlwaysActive; - bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon - bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) - bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach - float MipBias; // Offset to mip level for TAA sharpness# - float pad0; + bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon + bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) + bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach + float MipBias; // Offset to mip level for TAA sharpness + float VRMipBias; // Additional negative MIP bias for VR foliage sharpening (depth-scaled) + float VRMipBiasNearDist; // Game units: no VR MIP bias closer than this + float VRMipBiasFarDist; // Game units: full VR MIP bias beyond this + uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees (TREE_ANIM) only + float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = disabled) + float2 pad0; float4 AmbientSHR; float4 AmbientSHG; float4 AmbientSHB; @@ -52,7 +57,7 @@ namespace SharedData bool EnableShadows; bool ExtendShadows; bool EnableParallaxWarpingFix; - float1 pad0; + bool pad0; }; struct CubemapCreatorSettings diff --git a/package/Shaders/Common/VR.hlsli b/package/Shaders/Common/VR.hlsli index d744022781..46cf57a7e7 100644 --- a/package/Shaders/Common/VR.hlsli +++ b/package/Shaders/Common/VR.hlsli @@ -21,6 +21,7 @@ cbuffer VRValues : register(b13) float2 EyeOffsetScale : packoffset(c0.z); float4 EyeClipEdge[2] : packoffset(c1); } + #endif namespace Stereo @@ -626,6 +627,16 @@ namespace Stereo vsout.VRPosition.z = clipPos.z; vsout.VRPosition.w = clipPos.w; + // Hardcoded ~0.75px diagonal jitter for Eye 1 stereo edge supersampling. + // Larger offset increases chance of different alpha test outcomes between eyes + // (tree branches vs sky). NDC for 6304x3088 SBS reference; scales with resolution. + if (a_eyeIndex == 1) { + // ~0.75px diagonal jitter for Eye 1 stereo edge supersampling. + // Scales with resolution: 0.53/halfWidth horizontal, 1.06/height vertical. + float2 kJitterNDC = float2(0.53 / (FrameBuffer::BufferDim.x * 0.5), -1.06 / FrameBuffer::BufferDim.y); + vsout.VRPosition.xy += kJitterNDC * vsout.VRPosition.w; + } + vsout.ClipDistance = clipEdges.y; vsout.CullDistance = clipEdges.x; # endif // VR diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl index f149255718..61ad48bfb7 100644 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -19,6 +19,10 @@ RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); RWTexture2D MotionVectorsRW : register(u2); Texture2D DepthTexture : register(t4); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + #if defined(DYNAMIC_CUBEMAPS) Texture2D ReflectanceTexture : register(t5); TextureCube EnvTexture : register(t6); @@ -92,6 +96,16 @@ void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, uv *= FrameBuffer::DynamicResolutionParams2.xy; // adjust for dynamic res uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[uint2(dispatchID.xy)]; + if (mode == 2) { // MODE_MAIN — stencil-culled, no valid G-buffer + return; + } + } +#endif + uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); float3 normalGlossiness = NormalRoughnessTexture[dispatchID.xy]; diff --git a/package/Shaders/DistantTree.hlsl b/package/Shaders/DistantTree.hlsl index cbd4608676..0fa3314f0f 100644 --- a/package/Shaders/DistantTree.hlsl +++ b/package/Shaders/DistantTree.hlsl @@ -203,8 +203,14 @@ PS_OUTPUT main(PS_INPUT input) float alpha = TexDiffuse.SampleBias(SampDiffuse, input.TexCoord.xy, SharedData::MipBias).w; - if ((alpha - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +# if defined(VR) + alphaRef -= eyeIndex * 0.1; +# endif + if ((alpha - alphaRef) < 0) { + discard; + } } psout.Diffuse.xyz = input.Depth.xxx / input.Depth.yyy; @@ -213,8 +219,14 @@ PS_OUTPUT main(PS_INPUT input) float4 baseColor = TexDiffuse.SampleBias(SampDiffuse, input.TexCoord.xy, SharedData::MipBias); baseColor.xyz = Color::Diffuse(baseColor.xyz); - if ((baseColor.w - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +# if defined(VR) + alphaRef -= eyeIndex * 0.1; +# endif + if ((baseColor.w - alphaRef) < 0) { + discard; + } } # if defined(DEFERRED) diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index 777d0bd0c0..a28f6ed7ab 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -1780,7 +1780,19 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) rawRMAOS = blendedRMAOS; # endif # else // Non-landscape code - float4 rawBaseColor = TexColorSampler.SampleBias(SampColorSampler, diffuseUv, SharedData::MipBias); + // VR MIP bias: depth-gated sharpening for distant textures + // Mode 1 = All Textures, Mode 2 = Distant Trees (TREE_ANIM) only + float vrFoliageBias = 0; +# if defined(TREE_ANIM) + if (SharedData::VRMipBias < 0) { +# else + if (SharedData::VRMipBias < 0 && SharedData::VRMipBiasMode == 1) { +# endif + float linDepth = SharedData::GetScreenDepth(input.Position.z); + float t = saturate((linDepth - SharedData::VRMipBiasNearDist) / max(SharedData::VRMipBiasFarDist - SharedData::VRMipBiasNearDist, 1.0)); + vrFoliageBias = SharedData::VRMipBias * t; + } + float4 rawBaseColor = TexColorSampler.SampleBias(SampColorSampler, diffuseUv, SharedData::MipBias + vrFoliageBias); baseColor = float4(Color::Diffuse(rawBaseColor.rgb), rawBaseColor.a); float4 normalColor = TexNormalSampler.SampleBias(SampNormalSampler, uv, SharedData::MipBias); normal = normalColor; @@ -3021,11 +3033,11 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) float alpha = baseColor.w; # if defined(EMAT) && !defined(LANDSCAPE) # if defined(PARALLAX) - alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias).w; + alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias + vrFoliageBias).w; # elif defined(TRUE_PBR) [branch] if (PBRParallax) { - alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias).w; + alpha = TexColorSampler.SampleBias(SampColorSampler, uvOriginal, SharedData::MipBias + vrFoliageBias).w; } # endif # endif @@ -3074,9 +3086,28 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } alpha = saturate(1.05 * alpha); # endif // DEPTH_WRITE_DECALS +# if defined(TREE_ANIM) + // Fixed alpha floor — catch zombie texels with near-zero alpha + if (alpha < 0.1) { + discard; + } + if (alpha - AlphaTestRefRS < 0) { + discard; + } + // Suppress RGB fringe contamination from negative MIP bias. + // Low-alpha texels near the foliage boundary have bright padding bleeding into RGB. + // Alpha is a direct proxy for contamination — low alpha = more padding contribution. + // Scale correction by bias strength so close-range (no bias) textures are untouched. + if (vrFoliageBias < 0) { + float biasStrength = saturate(vrFoliageBias / min(SharedData::VRMipBias, -0.001)); + float fringeScale = 5.0; // higher = more aggressive fringe suppression + baseColor.rgb *= saturate(alpha * lerp(1.0, fringeScale, biasStrength)); + } +# else if (alpha - AlphaTestRefRS < 0) { discard; } +# endif // TREE_ANIM # endif // DO_ALPHA_TEST # if defined(ANISOTROPIC_ALPHA) @@ -3166,7 +3197,12 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } # endif - psout.Reflectance = float4(indirectLobeWeights.specular, psout.Diffuse.w); +# if (defined(EMAT) || defined(TRUE_PBR)) && (defined(PARALLAX) || defined(LANDSCAPE)) + psout.Reflectance = float4(indirectLobeWeights.specular, + (pixelOffset > 0.0) ? saturate(pixelOffset) : 0.0); +# else + psout.Reflectance = float4(indirectLobeWeights.specular, 0.0); +# endif psout.NormalGlossiness = float4(GBuffer::EncodeNormal(screenSpaceNormal), saturate(1.0 - material.Roughness), psout.Diffuse.w); # if defined(SNOW) diff --git a/package/Shaders/RunGrass.hlsl b/package/Shaders/RunGrass.hlsl index f05c3d0edd..664fe97a6a 100644 --- a/package/Shaders/RunGrass.hlsl +++ b/package/Shaders/RunGrass.hlsl @@ -480,22 +480,37 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) bool complex = abs(complexLength - 1.0) < SharedData::grassLightingSettings.ComplexGrassThreshold; # endif // !TRUE_PBR + // VR MIP bias: depth-gated sharpening for distant textures + float vrGrassBias = 0; + if (SharedData::VRMipBias < 0 && SharedData::VRMipBiasMode == 1) { + float linDepth = SharedData::GetScreenDepth(input.HPosition.z); + float t = saturate((linDepth - SharedData::VRMipBiasNearDist) / max(SharedData::VRMipBiasFarDist - SharedData::VRMipBiasNearDist, 1.0)); + vrGrassBias = SharedData::VRMipBias * t; + } + float4 baseColor; # if !defined(TRUE_PBR) if (complex) { - baseColor = TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, input.TexCoord.y * 0.5), SharedData::MipBias); + baseColor = TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, input.TexCoord.y * 0.5), SharedData::MipBias + vrGrassBias); } else # endif // !TRUE_PBR { - baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias); + baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias); } baseColor.xyz = Color::Diffuse(baseColor.xyz); # if defined(RENDER_DEPTH) float diffuseAlpha = input.VertexColor.w * baseColor.w; - if ((diffuseAlpha - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +# if defined(VR) + uint convergenceEyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); + alphaRef -= convergenceEyeIndex * 0.1; +# endif + if ((diffuseAlpha - alphaRef) < 0) { + discard; + } } # endif // RENDER_DEPTH || DO_ALPHA_TEST @@ -505,9 +520,9 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) psout.PS.w = diffuseAlpha; # else # if !defined(TRUE_PBR) - float4 specColor = complex ? TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, 0.5 + input.TexCoord.y * 0.5), SharedData::MipBias) : 1; + float4 specColor = complex ? TexBaseSampler.SampleBias(SampBaseSampler, float2(input.TexCoord.x, 0.5 + input.TexCoord.y * 0.5), SharedData::MipBias + vrGrassBias) : 1; # else - float4 specColor = TexNormalSampler.SampleBias(SampNormalSampler, input.TexCoord.xy, SharedData::MipBias); + float4 specColor = TexNormalSampler.SampleBias(SampNormalSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias); # endif uint eyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); @@ -548,7 +563,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # endif // !TRUE_PBR # if defined(TRUE_PBR) - float4 rawRMAOS = TexRMAOSSampler.SampleBias(SampRMAOSSampler, input.TexCoord.xy, SharedData::MipBias) * float4(PBRParams1.x, 1, 1, PBRParams1.y); + float4 rawRMAOS = TexRMAOSSampler.SampleBias(SampRMAOSSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias) * float4(PBRParams1.x, 1, 1, PBRParams1.y); PBR::SurfaceProperties pbrSurfaceProperties = PBR::InitSurfaceProperties(); @@ -846,13 +861,27 @@ PS_OUTPUT main(PS_INPUT input) { PS_OUTPUT psout; - float4 baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias); + // VR MIP bias: depth-gated sharpening for distant textures + float vrGrassBias = 0; + if (SharedData::VRMipBias < 0 && SharedData::VRMipBiasMode == 1) { + float linDepth = SharedData::GetScreenDepth(input.HPosition.z); + float t = saturate((linDepth - SharedData::VRMipBiasNearDist) / max(SharedData::VRMipBiasFarDist - SharedData::VRMipBiasNearDist, 1.0)); + vrGrassBias = SharedData::VRMipBias * t; + } + + float4 baseColor = TexBaseSampler.SampleBias(SampBaseSampler, input.TexCoord.xy, SharedData::MipBias + vrGrassBias); # if defined(RENDER_DEPTH) float diffuseAlpha = input.VertexColor.w * baseColor.w; - - if ((diffuseAlpha - AlphaTestRefRS) < 0) { - discard; + { + float alphaRef = AlphaTestRefRS; +# if defined(VR) + uint convergenceEyeIndex = Stereo::GetEyeIndexPS(input.HPosition, VPOSOffset); + alphaRef -= convergenceEyeIndex * 0.1; +# endif + if ((diffuseAlpha - alphaRef) < 0) { + discard; + } } # endif // RENDER_DEPTH || DO_ALPHA_TEST diff --git a/package/Shaders/VR/StereoBlendCS.hlsl b/package/Shaders/VR/StereoBlendCS.hlsl index 7322e9e513..c443d04d1b 100644 --- a/package/Shaders/VR/StereoBlendCS.hlsl +++ b/package/Shaders/VR/StereoBlendCS.hlsl @@ -11,6 +11,7 @@ #include "Common/Color.hlsli" #include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" #include "Common/VR.hlsli" Texture2D ColorTexture : register(t0); @@ -18,6 +19,38 @@ Texture2D DepthTexture : register(t1); RWTexture2D OutputRW : register(u0); +#ifdef STEREO_OVERWRITE +RWTexture2D MotionRW : register(u1); +Texture2D ModeTexture : register(t2); +Texture2D ReflectanceTexture : register(t3); // .w = POM pixelOffset from Lighting pass +SamplerState LinearSampler : register(s0); + +// Mode constants matching VRStereoOptimizations/cbuffers.hlsli +// (can't include directly — its cbuffer on b1 conflicts with StereoBlendCB) +# define MODE_DISOCCLUDED 0 +# define MODE_EDGE 1 +# define MODE_MAIN 2 +# define MODE_EDGE_NEIGHBOUR 3 +# define MODE_FULL_BLEND 4 + +// Hardware bilinear color sample from reprojected pixel coordinates. +// Converts integer pixel coords to proper full-texture UV for SampleLevel, +// clamped to the active DRS viewport to prevent sampling stale data. +// Motion vectors stay as integer Load() — filtering them breaks DLSS. +float4 SampleReprojectedColor(int2 reprojPx, float2 frameDim) +{ + uint texW, texH; + ColorTexture.GetDimensions(texW, texH); + float2 texSize = float2(texW, texH); + float2 sampleUV = (float2(reprojPx) + 0.5) / texSize; + // Clamp to active DRS viewport bounds (half-texel inset to keep bilinear inside valid region) + float2 minUV = 0.5 / texSize; + float2 maxUV = (frameDim - 0.5) / texSize; + sampleUV = clamp(sampleUV, minUV, maxUV); + return ColorTexture.SampleLevel(LinearSampler, sampleUV, 0); +} +#endif + cbuffer StereoBlendCB : register(b1) { float2 FrameDim; @@ -25,11 +58,16 @@ cbuffer StereoBlendCB : register(b1) float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer, 3 = POM depth heatmap + float FullBlendDistance; + float POMDepthScale; + float _pad; }; -static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend -static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend +static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kDepthAgreementThreshold = 0.015; // Relative depth difference threshold for overwrite mode disocclusion rejection // Samples four depth neighbors in a cross pattern (±offset pixels) around center, // clamped to eyeIndex's half of the packed stereo buffer to avoid seam contamination. @@ -46,6 +84,175 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (any(dtid >= uint2(FrameDim))) return; +#ifdef STEREO_OVERWRITE + // ========================================================================= + // Mode-driven stereo merge: reads per-pixel classification from StencilCS + // and applies appropriate action per mode and eye. + // Mode texture is full SBS resolution — ModeTexture[dtid] maps directly. + // ========================================================================= + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float centerDepth = DepthTexture[dtid]; + + // HMD mask pixels (depth >= 1.0 in reversed-Z) — always skip + if (centerDepth >= 1.0) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Debug mode 1: depth map diagnostic — show mode texture as solid colors (all pixels) + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); + return; + } + + // Debug mode 2: full blend depth visualizer — cyan tint based on proximity to FullBlendDistance + if (DebugMode == 2) { + if (centerDepth < 1e-5 || centerDepth >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(centerDepth); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // Debug mode 3: POM depth data visualizer — show Reflectance.w as color + if (DebugMode == 3) { + float pomVal = ReflectanceTexture[dtid].w; + float4 c = ColorTexture[dtid]; + if (pomVal > 1e-2) { + // POM pixel: red-to-green gradient based on parallaxAmount + // Red = peak (high pomVal, closer to camera), Green = valley (low pomVal, farther), Yellow = geometry plane + float3 pomColor = float3(pomVal, 1.0 - pomVal, 0); + OutputRW[dtid] = float4(lerp(c.rgb, pomColor, 0.7), c.a); + } + // Non-POM pixels (pomVal ~ 0) left untouched + return; + } + + // MODE_DISOCCLUDED: fully shaded, leave untouched + if (pixelMode == MODE_DISOCCLUDED) + return; + + // MODE_FULL_BLEND: bilateral blend for 2x supersampling + if (pixelMode == MODE_FULL_BLEND) { + float4 center = ColorTexture[dtid]; + + // Check for POM depth offset at this pixel + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float reprojDepthFB = centerDepth; + float pomOffsetFB = ReflectanceTexture[dtid].w; + if (pomOffsetFB > 1e-2 && POMDepthScale > 0) { + float linDepthFB = SharedData::GetScreenDepth(centerDepth); + float depthCorrectionFB = (0.5 - pomOffsetFB) * POMDepthScale; + float newLinDepthFB = max(linDepthFB + depthCorrectionFB, 1e-4); + reprojDepthFB = (SharedData::CameraData.x - SharedData::CameraData.w / newLinDepthFB) / SharedData::CameraData.z; + } + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepthFB, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = SampleReprojectedColor(r.otherPx, FrameDim); + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + float4 result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + + OutputRW[dtid] = result; + return; + } + + if (eyeIndex == 0) { + // Eye 0: fully shaded for all modes — only apply debug tint to edge pixels + if (DebugEdgeTint > 0 && pixelMode == MODE_EDGE) { + float4 c = ColorTexture[dtid]; + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), DebugEdgeTint), c.a); + } + return; + } + + // Eye 1: reproject all non-disoccluded, non-full-blend pixels (MAIN, EDGE) from Eye 0. + // StencilCS already performed the authoritative disocclusion check with the correct + // depth buffer state — no redundant depth agreement check here. + float reprojDepth = centerDepth; + + // First-pass reprojection to find Eye 0 source pixel + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); + if (!r.valid) + return; + + // Read POM offset from Eye 0 source's reflectance.w + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float pomOffset = ReflectanceTexture[r.otherPx].w; + if (pomOffset > 1e-2) { + // Re-reproject with POM-adjusted depth centered at geometry plane + float linearDepth = SharedData::GetScreenDepth(centerDepth); + float depthCorrection = (0.5 - pomOffset) * POMDepthScale; + float newLinearDepth = max(linearDepth + depthCorrection, 1e-4); + reprojDepth = (SharedData::CameraData.x - SharedData::CameraData.w / newLinearDepth) / SharedData::CameraData.z; + r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); + if (!r.valid) + return; + } + + // Skip if the Eye 0 source pixel is sky/unrendered (depth at clear value). + // At DeferredPasses time, sky hasn't rendered yet — source would have clear color. + // Let the sky/water pass fill these pixels later instead. + float sourceDepth = DepthTexture[r.otherPx]; + if (sourceDepth >= 1.0 || sourceDepth < 1e-5) + return; + + OutputRW[dtid] = SampleReprojectedColor(r.otherPx, FrameDim); + MotionRW[dtid] = MotionRW[r.otherPx]; + +#else // Normal bilateral blend path + +# ifdef EYE0_ONLY + // Only process Eye 0 (left half) - Eye 1 left untouched + float2 uvCheck = (dtid + 0.5) * RcpFrameDim; + if (Stereo::GetEyeIndexFromTexCoord(uvCheck) == 1) + return; +# endif + float2 uv = (dtid + 0.5) * RcpFrameDim; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); @@ -68,8 +275,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) // depth == 1.0: sky/far plane (no real geometry, bilateral reprojection not meaningful) bool isSkipPixel = centerDepth < 1e-5 || centerDepth >= 1.0; if (!isSkipPixel) { - // Source edge detection: skip at depth discontinuities (arm/world silhouettes, - // object edges). Saves VP reprojection work and prevents halo artifacts. + // Normal bilateral blend path float4 srcEdgeDepths = SampleCrossDepths(dtid, 1, eyeIndex); if (Stereo::MaxDepthDiff(centerDepth, srcEdgeDepths) > kEdgeDepthThreshold) { debugState = 1; @@ -78,10 +284,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (r.valid) { float otherDepth = DepthTexture[r.otherPx]; - // Destination edge detection: skip if the reprojected pixel is near the HMD - // mask boundary or at a depth discontinuity in the other eye. Due to VR - // parallax the arm silhouette appears at a different screen position per eye, - // so the reprojection can cross a boundary invisible from this eye. float4 dstEdgeDepths = SampleCrossDepths(r.otherPx, kEdgeMargin, 1 - eyeIndex); if (any(dstEdgeDepths < 1e-5) || Stereo::MaxDepthDiff(otherDepth, dstEdgeDepths) > kEdgeDepthThreshold) { debugState = 2; @@ -89,9 +291,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float4 otherColor = ColorTexture[r.otherPx]; Stereo::FinalizeStereoBlend(r, uv, centerDepth, otherDepth, eyeIndex, FrameDim, DepthSigma, MaxBlendFactor); - // Only blend where the two eyes actually disagree (screen-space effect - // inconsistency). Luminance difference below the threshold means both - // eyes computed the same result and blending would only destroy parallax. float colorDiff = abs(dot(centerColor.rgb, float3(0.2126, 0.7152, 0.0722)) - dot(otherColor.rgb, float3(0.2126, 0.7152, 0.0722))); float colorGate = smoothstep(ColorDiffThreshold * 0.5, ColorDiffThreshold * 2.0, colorDiff); @@ -106,7 +305,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } } -#ifdef DEBUG_BACKCHECK +# ifdef DEBUG_BACKCHECK // Debug visualization (6 states): // Blue = mask/sky: skipped // Yellow = source edge: depth discontinuity at this pixel @@ -123,7 +322,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float3(0.5, 0.0, 0.0) // 5: back-check failed - red }; OutputRW[dtid] = float4(lerp(centerColor.rgb, debugColors[debugState], 0.7), centerColor.a); -#elif defined(DEBUG_BLEND_WEIGHT) +# elif defined(DEBUG_BLEND_WEIGHT) // Blend weight heatmap: only pixels with actual blend activity are colorized. // Untouched pixels pass through unmodified. float w = saturate(r.blendWeight / max(MaxBlendFactor, 1e-5)); @@ -133,7 +332,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = centerColor; } -#elif defined(DEBUG_EDGE_DETECTION) +# elif defined(DEBUG_EDGE_DETECTION) // Edge detection visualizer: highlights pixels excluded by depth discontinuity checks. // Non-edge pixels show the normal blended output for scene context. // Bright yellow = source edge: discontinuity at this pixel @@ -145,7 +344,9 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = blendedColor; } -#else +# else OutputRW[dtid] = blendedColor; -#endif +# endif + +#endif // STEREO_OVERWRITE } diff --git a/package/Shaders/VR/VRPostProcessCS.hlsl b/package/Shaders/VR/VRPostProcessCS.hlsl new file mode 100644 index 0000000000..29df310420 --- /dev/null +++ b/package/Shaders/VR/VRPostProcessCS.hlsl @@ -0,0 +1,113 @@ +// VR Post-Process - Bilateral blend for near-camera 2x supersampling +// +// Runs after all compositing and stereo blending is complete. +// Reads per-pixel classification from StencilCS and applies: +// - MODE_FULL_BLEND: bilateral depth-weighted blend for 2x supersampling +// +// Only MODE_FULL_BLEND pixels are processed. All others pass through untouched. + +#include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" + +Texture2D ColorTexture : register(t0); // Copy of final composited image +Texture2D ModeTexture : register(t1); +Texture2D DepthTexture : register(t2); + +RWTexture2D OutputRW : register(u0); + +cbuffer VRPostProcessCB : register(b1) +{ + float2 FrameDim; + float2 RcpFrameDim; + float DebugEdgeTint; // 0 = off, >0 = debug visualization strength + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer + float FullBlendDistance; // Linearized depth threshold for full blend zone visualization + float _pad; // Pad to 16-byte alignment +}; + +#define MODE_DISOCCLUDED 0 +#define MODE_EDGE 1 +#define MODE_MAIN 2 +#define MODE_EDGE_NEIGHBOUR 3 +#define MODE_FULL_BLEND 4 + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + if (any(dtid >= uint2(FrameDim))) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Depth map diagnostic: show mode texture contents as solid colors + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); // Orange = full blend zone + return; + } + + // Full blend depth visualizer: shows the depth boundary as a cyan tint + if (DebugMode == 2) { + float2 uvDb = (dtid + 0.5) * RcpFrameDim; + float depthDb = DepthTexture[dtid]; + if (depthDb < 1e-5 || depthDb >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(depthDb); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // Only process full blend pixels + if (pixelMode != MODE_FULL_BLEND) + return; + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float4 result = ColorTexture[dtid]; + + // === MODE_FULL_BLEND: bilateral blend for 2x supersampling === + { + float4 center = result; + float centerDepth = DepthTexture[dtid]; + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes. + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = ColorTexture[r.otherPx]; + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + } + + OutputRW[dtid] = result; +} diff --git a/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl new file mode 100644 index 0000000000..bd34d26d58 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl @@ -0,0 +1,55 @@ +// VR Stereo Optimizations - Reprojection Compute Shader +// +// Fills Eye 1 pixels that were stencil-culled during rendering by reprojecting +// color data from Eye 0. Only operates on pixels classified as MODE_MAIN. +// +// Reads Eye 0 color directly from the OutputRW UAV (left half) and writes to +// Eye 1 (right half). No read-write conflict because reads and writes target +// strictly different halves of the texture. +// +// Input: +// t0 = Depth buffer +// t1 = Per-pixel mode classification texture +// Output: +// u0 = Main render target UAV (reads Eye 0, writes Eye 1) + +#include "Common/VR.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); +Texture2D ModeTexture : register(t1); + +RWTexture2D OutputRW : register(u0); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + uint eyeWidth = (uint)FrameDim.x / 2; + uint eyeHeight = (uint)FrameDim.y; + + if (any(dtid >= uint2(eyeWidth, eyeHeight))) + return; + + // dtid is in Eye 1 local coords; convert to stereo buffer coords + uint2 stereoCoord = uint2(dtid.x + eyeWidth, dtid.y); + + // Only fill pixels that were marked for reprojection + // Mode texture is full SBS resolution, so use stereoCoord for Eye 1 + uint mode = ModeTexture[stereoCoord]; + if (mode != MODE_MAIN) + return; + + float depth = DepthTexture[stereoCoord]; + + // Compute mono UV for this Eye 1 pixel + float2 stereoUV = (float2(stereoCoord) + 0.5) * RcpFrameDim; + float2 monoUV = Stereo::ConvertFromStereoUV(stereoUV, 1); + + // Reproject to Eye 0 and sample color + float3 otherEyeUV = Stereo::ConvertMonoUVToOtherEye(float3(monoUV, depth), 1); + float2 eye0StereoUV = Stereo::ConvertToStereoUV(otherEyeUV.xy, 0); + int2 eye0Px = clamp(int2(eye0StereoUV * FrameDim), int2(0, 0), int2(FrameDim) - 1); + + float4 reprojectedColor = OutputRW[eye0Px]; + + // Write to Eye 1 in the main render target + OutputRW[stereoCoord] = reprojectedColor; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl new file mode 100644 index 0000000000..6ff7b13376 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl @@ -0,0 +1,149 @@ +// VR Stereo Optimizations - Stencil Classification Compute Shader +// +// Classifies BOTH eyes over the full SBS buffer. Each pixel is tagged as: +// MODE_DISOCCLUDED - Must be fully shaded (sky, HMD mask, parallax-occluded) +// MODE_EDGE - Depth edge boundary (dist 1) or inner/foreground band; fully shaded + bilateral blend +// MODE_MAIN - Standard pixel eligible for reprojection / bilateral blend +// MODE_FULL_BLEND - Near-camera geometry: both eyes fully shaded for 2x supersampling +// +// Dispatched over full SBS resolution (FrameDim.x x FrameDim.y). + +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); + +RWTexture2D ModeTextureRW : register(u0); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + if (any(dtid >= uint2(FrameDim))) + return; + + // Determine which eye this pixel belongs to + float2 uv = (float2(dtid) + 0.5) / FrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + // Read depth directly in SBS coords + float centerDepth = DepthTexture[dtid]; + +#ifdef DEBUG_DEPTH_MAP + // DIAGNOSTIC: Visualize what depth values StencilCS sees. + // Green (MODE_EDGE) = depth >= 1.0 (HMD mask threshold) + // Magenta (MODE_EDGE_NEIGHBOUR) = depth < 1e-5 (sky threshold) + // No tint (MODE_MAIN) = normal geometry with valid depth + if (centerDepth >= 1.0) { + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + if (centerDepth < 1e-5) { + ModeTextureRW[dtid] = MODE_EDGE_NEIGHBOUR; + return; + } + ModeTextureRW[dtid] = MODE_MAIN; + return; +#endif + + // Sky/unrendered pixels (depth >= 1.0 at z-prepass time = depth buffer clear value) + // and HMD mask pixels both have depth >= 1.0 here. Treat them the same as sky: + // let edge detection run so geometry-vs-sky boundaries get classified. + // HMD mask pixels are in lens corners with no nearby geometry, so they'll + // fall through to MODE_DISOCCLUDED at the end. + bool isSky = (centerDepth < 1e-5) || (centerDepth >= 1.0); + float linCenter = isSky ? 999999.0 : SharedData::GetScreenDepth(centerDepth); + + // Near-camera supersampling: geometry closer than FullBlendDistance gets full + // shading in both eyes for bilateral blend (2x supersampling in VRPostProcess). + if (!isSky && linCenter < FullBlendDistance) { + ModeTextureRW[dtid] = MODE_FULL_BLEND; + return; + } + + // --- Disocclusion detection via reprojection (runs for all non-sky pixels) --- + // Early return: disoccluded pixels are always MODE_DISOCCLUDED regardless of edge proximity. + // This ensures MinEdgeDistance never affects disocclusion classification. + if (!isSky) { + Stereo::StereoBilateralResult reproj = Stereo::ReprojectToOtherEye( + uv, + centerDepth, + eyeIndex, + FrameDim); + + bool isDisoccluded = false; + if (!reproj.valid) { + isDisoccluded = true; + } else { + float otherDepth = DepthTexture[reproj.otherPx]; + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float relativeDepthDiff = abs(centerDepth - otherDepth) / maxDepth; + isDisoccluded = (relativeDepthDiff > DisocclusionThreshold); + } + + if (isDisoccluded) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + } + + // Depth gate: skip edge detection for nearby geometry (saves perf, distant AA matters more) + // Sky pixels always run edge detection — they need to expand the edge band outward. + // Disocclusion detection (above) is independent of this gate and always runs. + bool skipEdgeDetection = !isSky && (linCenter < MinEdgeDistance); + + // --- Edge detection with two-tier classification --- + // MODE_EDGE: immediate neighbor (distance 1) has depth discontinuity, OR + // inner/foreground band (distance <= kInnerWidth). + static const uint kInnerWidth = 2; + int2 offsets[4] = { int2(-1, 0), int2(1, 0), int2(0, -1), int2(0, 1) }; + + uint nearestEdgeDist = 0xFFFFFFFF; // nearest distance at which a discontinuity was found + bool nearestWeAreOuter = false; // whether we are on the background side at that nearest hit + + // Use the larger of inner/outer widths for the search + uint maxWidth = kInnerWidth; + + if (!skipEdgeDetection) { + [loop] for (uint d = 1; d <= maxWidth; d++) + { + [unroll] for (int i = 0; i < 4; i++) + { + int2 rawNeighbor = int2(dtid) + offsets[i] * (int)d; + uint2 neighborCoord = Stereo::ClampToEyeBounds(rawNeighbor, eyeIndex, FrameDim); + + float neighborDepth = DepthTexture[neighborCoord]; + bool neighborIsSky = (neighborDepth < 1e-5) || (neighborDepth >= 1.0); + float linNeighbor = neighborIsSky ? 999999.0 : SharedData::GetScreenDepth(neighborDepth); + float maxLin = max(max(linCenter, linNeighbor), 1e-5); + float relDepthDiff = abs(linCenter - linNeighbor) / maxLin; + + if (relDepthDiff > EdgeDepthThreshold && d < nearestEdgeDist) { + nearestEdgeDist = d; + nearestWeAreOuter = (linNeighbor < linCenter); // neighbor closer to camera = we are background + } + } + } + + } // !skipEdgeDetection + + if (nearestEdgeDist != 0xFFFFFFFF) { + // Classify based on distance and side + if (nearestEdgeDist == 1) { + // Immediate neighbor discontinuity: always MODE_EDGE regardless of side + ModeTextureRW[dtid] = MODE_EDGE; + return; + } else if (!nearestWeAreOuter && nearestEdgeDist <= kInnerWidth) { + // Inner/foreground band beyond distance 1 + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + } + + // Sky pixels that aren't near edges -> disoccluded (reprojection is meaningless for sky) + if (isSky) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + + // Standard pixel + ModeTextureRW[dtid] = MODE_MAIN; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl new file mode 100644 index 0000000000..c45c2a2409 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl @@ -0,0 +1,54 @@ +// VR Stereo Optimizations - Stencil Write Pixel Shader +// +// Reads from the per-pixel mode classification texture and depth texture. +// Discards pixels that should NOT be stencil-culled: +// - MODE_DISOCCLUDED (0) = fully shaded in Eye 1, no reprojection needed +// - MODE_FULL_BLEND (4) = near-camera pixels fully shaded in both eyes for supersampling +// - Sky/HMD-mask pixels (depth >= 1.0 or depth < 1e-5) = need normal rendering +// in the sky pass; they keep their MODE_EDGE tag in +// the mode texture for VRPostProcess but must not be stencil-culled. +// +// Only geometry MODE_MAIN/MODE_EDGE pixels survive and get stencil ref=1 written. +// +// Mode texture is full SBS resolution (same as render target). +// The DSS is configured with StencilFunc=ALWAYS, StencilPassOp=REPLACE, ref=1. +// Pixels that survive (not discarded) get stencil=1 written. + +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D ModeTexture : register(t0); +Texture2D DepthTexture : register(t1); + +struct PS_INPUT +{ + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; +}; + +void main(PS_INPUT input) +{ + // Mode texture is full SBS resolution — SV_Position maps directly + // (viewport is Eye 1 half, so SV_Position.x starts at eyeWidth) + int2 modeCoord = int2(input.Position.xy); + + uint mode = ModeTexture[modeCoord]; + + // MODE_MAIN and MODE_EDGE in Eye 1 write stencil ref=1 (reprojectable). + // These are reprojected from Eye 0; MODE_DISOCCLUDED and MODE_FULL_BLEND are fully shaded in Eye 1. + if (mode == MODE_DISOCCLUDED) + discard; + + // Sky/HMD-mask pixels must not be stencil-culled regardless of edge classification. + // They keep their MODE_EDGE tag in the mode texture for VRPostProcess, + // but must render normally in the sky pass (which runs after stencil culling). + float depth = DepthTexture[modeCoord]; + if (depth >= 1.0 || depth < 1e-5) + discard; + + // MODE_FULL_BLEND: near-camera pixels fully shaded in both eyes for supersampling + if (mode == MODE_FULL_BLEND) + discard; + + // Pixel survives: DSS writes stencil ref=1 + // No color output (no RTV bound) +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl new file mode 100644 index 0000000000..353aa53379 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl @@ -0,0 +1,24 @@ +// VR Stereo Optimizations - Stencil Write Vertex Shader +// +// Procedural fullscreen triangle covering Eye 1 (right half of SBS buffer). +// No vertex buffer needed — vertex positions are generated from SV_VertexID. +// The viewport is set to Eye 1 by the C++ code, so we just emit a standard +// fullscreen triangle in clip space. + +struct VS_OUTPUT +{ + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; +}; + +VS_OUTPUT main(uint vertexID : SV_VertexID) +{ + VS_OUTPUT output; + + // Fullscreen triangle: 3 vertices covering [-1,1] clip space + float2 uv = float2((vertexID << 1) & 2, vertexID & 2); + output.Position = float4(uv * float2(2, -2) + float2(-1, 1), 0, 1); + output.TexCoord = uv; + + return output; +} diff --git a/package/Shaders/VRStereoOptimizations/cbuffers.hlsli b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli new file mode 100644 index 0000000000..5f8c79caf7 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli @@ -0,0 +1,35 @@ +// VR Stereo Optimizations - Shared constant buffer layout +// Must match VRStereoOptParams in VRStereoOptimizations.h exactly + +#ifndef __VR_STEREO_OPT_CBUFFERS_HLSLI__ +#define __VR_STEREO_OPT_CBUFFERS_HLSLI__ + +cbuffer VRStereoOptParams : register(b1) +{ + float2 FrameDim; // Full stereo buffer dimensions (both eyes) + float2 RcpFrameDim; // 1.0 / FrameDim + + uint StereoModeValue; // 0=Off, 1=Enable + float DisocclusionThreshold; // Depth difference threshold for disocclusion detection + float EdgeDepthThreshold; // Relative depth difference threshold for edge detection + uint EdgeWidth; // Half-width of edge detection band in pixels + + float2 QualityJitter; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; // Radius of foveal region in UV space + float pad2; + + float2 FoveatedCenter; // Center of foveal region in UV space + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth below which pixels get MODE_FULL_BLEND (game units) +}; + +#define STEREO_MODE_OFF 0 +#define STEREO_MODE_ENABLE 1 + +#define MODE_DISOCCLUDED 0 // Fully shaded, no reprojection, no blend (sky, HMD mask, parallax-occluded) +#define MODE_EDGE 1 // Depth edge boundary (distance 1) or inner/foreground band; fully shaded + bilateral blend +#define MODE_MAIN 2 // Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite reproject (Perf) / bilateral (Quality) +#define MODE_EDGE_NEIGHBOUR 3 // (Legacy, unused) Outer/background band — now classified as MODE_MAIN +#define MODE_FULL_BLEND 4 // Near-camera geometry: both eyes fully shaded + bilateral blend for 2x supersampling + +#endif diff --git a/src/Deferred.cpp b/src/Deferred.cpp index a0a322500b..ba8c1f3830 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -14,6 +14,7 @@ #include "Features/TerrainBlending.h" #include "Features/Upscaling.h" #include "Features/VR.h" +#include "Features/VRStereoOptimizations.h" #include "Features/WeatherEditor.h" #include "Hooks.h" @@ -275,6 +276,11 @@ void Deferred::StartDeferred() PrepassPasses(); OverrideBlendStates(); + + // VR: Classify Eye 1 pixels and write hardware stencil marks before geometry rendering + if (globals::game::isVR) { + globals::features::vrStereoOptimizations.DispatchStencil(); + } } void Deferred::DeferredPasses() @@ -363,6 +369,14 @@ void Deferred::DeferredPasses() context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + // Bind VRStereoOptimizations mode texture for Eye 1 skip + auto& vrStereoOpt = globals::features::vrStereoOptimizations; + if (REL::Module::IsVR() && vrStereoOpt.loaded && vrStereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off) { + ID3D11ShaderResourceView* modeSRV = vrStereoOpt.GetModeTextureSRV(); + if (modeSRV) + context->CSSetShaderResources(16, 1, &modeSRV); + } + ID3D11UnorderedAccessView* uavs[3]{ main.UAV, normals.UAV, motionVectors.UAV }; context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); @@ -370,13 +384,28 @@ void Deferred::DeferredPasses() context->CSSetShader(shader, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + + // Unbind mode texture SRV + if (REL::Module::IsVR() && vrStereoOpt.loaded && vrStereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources(16, 1, &nullSRV); + } + } + + // VR: Deactivate stencil culling now that geometry rendering is complete. + // Must happen before StereoBlend so the blend pass itself isn't stencil-blocked. + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vrStereoOptimizations; + if (stereoOpt.IsStencilActive()) { + stereoOpt.DeactivateStencil(); + } } - // VR stereo consistency blend - depth-aware bilateral blend at the eye seam - // Runs after composite as a general safety net for all screen-space effects. - // Must run before clearing b12/b13 -- needs FrameBuffer matrices for reprojection. - if (globals::game::isVR) + // VR: Stereo reprojection fills Eye 1 holes here (after DeferredComposite, before SSR/water/sky) + // so that ISReflectionsRayTracing sees valid pixels in both eyes. + if (globals::game::isVR) { globals::features::vr.DrawStereoBlend(); + } // Clear { @@ -475,6 +504,10 @@ void Deferred::OverrideBlendStates() blendDesc.RenderTarget[i].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; } + // RT[5] = REFLECTANCE: enable alpha writes for POM depth data + // stored in Reflectance.w, used by StereoBlendCS for depth-aware reprojection + blendDesc.RenderTarget[5].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[a][b][c][d])); } else { deferredBlendStates[a][b][c][d] = nullptr; @@ -551,6 +584,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainComposite() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeCS; @@ -576,6 +612,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainCompositeInterior() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeInteriorCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeInteriorCS; @@ -593,6 +632,7 @@ void Deferred::Hooks::Main_RenderWorld::thunk(bool a1) state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::InWorld); state->inWorld = true; func(a1); + state->inWorld = false; state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::InWorld); }; diff --git a/src/Feature.cpp b/src/Feature.cpp index 24b634979f..f3625e7d48 100644 --- a/src/Feature.cpp +++ b/src/Feature.cpp @@ -30,6 +30,7 @@ #include "Features/UnifiedWater.h" #include "Features/Upscaling.h" #include "Features/VR.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricLighting.h" #include "Features/VolumetricShadows.h" #include "Features/WaterEffects.h" @@ -247,6 +248,7 @@ const std::vector& Feature::GetFeatureList() static auto BuildVRList = []() -> std::vector { auto v = features; v.push_back(&globals::features::vr); + v.push_back(&globals::features::vrStereoOptimizations); // In developer mode, keep all features for testing // In production mode, filter to VR-compatible only diff --git a/src/Features/ExtendedMaterials.h b/src/Features/ExtendedMaterials.h index 2a05a940aa..83da3abed3 100644 --- a/src/Features/ExtendedMaterials.h +++ b/src/Features/ExtendedMaterials.h @@ -36,7 +36,7 @@ struct ExtendedMaterials : Feature uint ExtendShadows = 1; uint EnableParallaxWarpingFix = 1; - float pad[1]; + uint pad0 = 0; }; STATIC_ASSERT_ALIGNAS_16(Settings); diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp index 458beccb23..a20ef95254 100644 --- a/src/Features/ScreenSpaceGI.cpp +++ b/src/Features/ScreenSpaceGI.cpp @@ -5,6 +5,7 @@ #include "Deferred.h" #include "State.h" #include "Util.h" +#include "VRStereoOptimizations.h" NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( ScreenSpaceGI::Settings, @@ -593,6 +594,8 @@ void ScreenSpaceGI::CompileComputeShaders() for (auto& info : shaderInfos) { if (REL::Module::IsVR()) info.defines.push_back({ "VR", "" }); + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + info.defines.push_back({ "VR_STEREO_OPT", "" }); if (settings.ResolutionMode == 1) info.defines.push_back({ "HALF_RES", "" }); if (settings.ResolutionMode == 2) @@ -743,6 +746,15 @@ void ScreenSpaceGI::DrawSSGI() context->CSSetConstantBuffers(5, 1, &sharedDataBuf); context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); + // Bind VRStereoOptimizations mode texture for Eye 1 compute culling + auto& vrStereoOpt = globals::features::vrStereoOptimizations; + bool stereoOptActive = REL::Module::IsVR() && vrStereoOpt.loaded && vrStereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + if (stereoOptActive) { + ID3D11ShaderResourceView* modeSRV = vrStereoOpt.GetModeTextureSRV(); + if (modeSRV) + context->CSSetShaderResources(16, 1, &modeSRV); + } + // prefilter depths { TracyD3D11Zone(globals::state->tracyCtx, "SSGI - Prefilter Depths"); @@ -873,7 +885,11 @@ void ScreenSpaceGI::DrawSSGI() // VR stereo sync: bilateral blend of SSGI buffers between eyes // Shi, Billeter, Eisemann 2022, "Stereo-consistent screen-space ambient occlusion" - if (REL::Module::IsVR() && stereoSyncCompute) { + bool useStereoOpt = REL::Module::IsVR() && + globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + + if (REL::Module::IsVR() && stereoSyncCompute && !useStereoOpt) { TracyD3D11Zone(globals::state->tracyCtx, "SSGI - Stereo Sync"); if (globals::state->frameAnnotations) @@ -930,6 +946,12 @@ void ScreenSpaceGI::DrawSSGI() // cleanup resetViews(); + // Unbind VRStereoOptimizations mode texture SRV + if (stereoOptActive) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources(16, 1, &nullSRV); + } + samplers.fill(nullptr); cb = nullptr; diff --git a/src/Features/ScreenSpaceShadows.cpp b/src/Features/ScreenSpaceShadows.cpp index 6f1a8194d9..f5f480b669 100644 --- a/src/Features/ScreenSpaceShadows.cpp +++ b/src/Features/ScreenSpaceShadows.cpp @@ -1,6 +1,7 @@ #include "ScreenSpaceShadows.h" #include "State.h" +#include "VRStereoOptimizations.h" #pragma warning(push) #pragma warning(disable: 4838 4244) @@ -40,13 +41,13 @@ void ScreenSpaceShadows::DrawSettings() if (auto _tt = Util::HoverTooltipWrapper()) ImGui::Text("Contrast boost for the shadow transition. Higher values produce harder shadow edges."); - if (globals::game::isVR && globals::state->IsDeveloperMode()) { + if (globals::game::isVR) { ImGui::Checkbox("VR Stereo Sync", &enableStereoSync); if (auto _tt = Util::HoverTooltipWrapper()) ImGui::Text( "Synchronizes shadow data between left and right eyes via bilateral reprojection " "and applies a depth-weighted blur to reduce per-eye noise. " - "Uses min-blend so if either eye detects an occluder, the shadow is preserved. "); + "Uses min-blend so if either eye detects an occluder, the shadow is preserved."); } ImGui::Spacing(); @@ -65,6 +66,10 @@ void ScreenSpaceShadows::InvalidateRaymarchShaders() raymarchRightCS->Release(); raymarchRightCS = nullptr; } + if (raymarchRightReducedCS) { + raymarchRightReducedCS->Release(); + raymarchRightReducedCS = nullptr; + } } void ScreenSpaceShadows::ClearShaderCache() @@ -78,23 +83,13 @@ void ScreenSpaceShadows::ClearShaderCache() uint ScreenSpaceShadows::GetScaledSampleCount() { - float2 renderSize = Util::ConvertToDynamic(globals::state->screenSize); - - // In VR, renderSize covers both eyes side-by-side; raymarch dispatches per-eye - if (globals::game::isVR) - renderSize.x /= 2.0f; - - // Scale sample count based on both dimensions relative to 1920x1080 reference - float2 referenceRes = { 1920.0f, 1080.0f }; - float referenceArea = referenceRes.x * referenceRes.y; - float currentArea = renderSize.x * renderSize.y; - float areaScale = std::sqrt(currentArea / referenceArea); - uint scaledSampleCount = static_cast(std::round(bendSettings.SampleCount * 60 * areaScale)); - - // Quantize to steps of 8 to prevent frequent recompilation from small DRS oscillations - scaledSampleCount = ((scaledSampleCount + 7u) / 8u) * 8u; - scaledSampleCount = std::max(scaledSampleCount, 8u); - + // Shadow reach in pixels is resolution-independent: a tree branch casts + // the same pixel-length shadow at 1080p and 3000p. Sample count controls + // reach, not quality-per-pixel. The old formula (multiplier * 64) was + // correct; the area-based scaling produced 2-8x more samples at VR + // resolution with no quality benefit, only GPU cost. + // Always produces WAVE_SIZE-aligned counts for correct Bend READ_COUNT. + uint scaledSampleCount = bendSettings.SampleCount * 64; return scaledSampleCount; } @@ -117,11 +112,44 @@ ID3D11ComputeShader* ScreenSpaceShadows::GetComputeRaymarchRight() { if (!raymarchRightCS) { uint scaledSampleCount = GetScaledSampleCount(); - raymarchRightCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", { { "SAMPLE_COUNT", std::format("{}", scaledSampleCount).c_str() }, { "RIGHT", "" } }, "cs_5_0"); + auto sampleCountStr = std::format("{}", scaledSampleCount); + std::vector> defines = { + { "SAMPLE_COUNT", sampleCountStr.c_str() }, + { "RIGHT", "" } + }; + raymarchRightCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", defines, "cs_5_0"); } return raymarchRightCS; } +ID3D11ComputeShader* ScreenSpaceShadows::GetComputeRaymarchRightReduced() +{ + uint fullCount = GetScaledSampleCount(); + uint divisor = (stereoOptRightEyeReduction == 1) ? 4 : 2; + uint reducedCount = std::max(fullCount / divisor, 64u); + // Quantize to WAVE_SIZE (64) for clean READ_COUNT in Bend's algorithm + reducedCount = ((reducedCount + 63u) / 64u) * 64u; + + if (reducedCount != lastCompiledReducedSampleCount) { + lastCompiledReducedSampleCount = reducedCount; + if (raymarchRightReducedCS) { + raymarchRightReducedCS->Release(); + raymarchRightReducedCS = nullptr; + } + } + + if (!raymarchRightReducedCS) { + auto sampleCountStr = std::format("{}", reducedCount); + std::vector> defines = { + { "SAMPLE_COUNT", sampleCountStr.c_str() }, + { "RIGHT", "" } + }; + raymarchRightReducedCS = (ID3D11ComputeShader*)Util::CompileShader( + L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", defines, "cs_5_0"); + } + return raymarchRightReducedCS; +} + void ScreenSpaceShadows::DrawShadows() { ZoneScoped; @@ -148,6 +176,7 @@ void ScreenSpaceShadows::DrawShadows() auto lightProjectionF = CalculateLightProjection(0); float2 renderSize = Util::ConvertToDynamic(state->screenSize); + int viewportSize[2] = { (int)renderSize.x, (int)renderSize.y }; if (globals::game::isVR) @@ -156,12 +185,11 @@ void ScreenSpaceShadows::DrawShadows() int minRenderBounds[2] = { 0, 0 }; int maxRenderBounds[2] = { viewportSize[0], viewportSize[1] }; - // Setup common render state auto* depthSRV = Util::GetCurrentSceneDepthSRV(); - context->CSSetShaderResources(0, 1, &depthSRV); + auto* shadowUAV = screenSpaceShadowsTexture->uav.get(); - auto uav = screenSpaceShadowsTexture->uav.get(); - context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + context->CSSetShaderResources(0, 1, &depthSRV); + context->CSSetUnorderedAccessViews(0, 1, &shadowUAV, nullptr); context->CSSetSamplers(0, 1, &pointBorderSampler); @@ -170,7 +198,8 @@ void ScreenSpaceShadows::DrawShadows() auto viewport = globals::game::graphicsState; - float2 dynamicRes = { viewport->GetRuntimeData().dynamicResolutionWidthRatio, viewport->GetRuntimeData().dynamicResolutionHeightRatio }; + float2 dynamicRes = { viewport->GetRuntimeData().dynamicResolutionWidthRatio, + viewport->GetRuntimeData().dynamicResolutionHeightRatio }; // Shared dispatch logic for both VR and non-VR auto DispatchEye = [&](const char* eyeName, ID3D11ComputeShader* shader, const float* lightProj, @@ -228,9 +257,21 @@ void ScreenSpaceShadows::DrawShadows() } else { DispatchEye("Left Eye", GetComputeRaymarch(), lightProjectionF.data(), InvTexSizeX, InvTexSizeY); - // Calculate light projection for right eye auto lightProjectionRightF = CalculateLightProjection(1); - DispatchEye("Right Eye", GetComputeRaymarchRight(), lightProjectionRightF.data(), InvTexSizeX, InvTexSizeY); + + bool useStereoOpt = REL::Module::IsVR() && + globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off; + + if (useStereoOpt) { + // Reduced sample count for right eye — StereoBlend overwrites most of it + DispatchEye("Right Eye (Reduced)", GetComputeRaymarchRightReduced(), + lightProjectionRightF.data(), InvTexSizeX, InvTexSizeY); + } else { + // Full sample count + DispatchEye("Right Eye", GetComputeRaymarchRight(), + lightProjectionRightF.data(), InvTexSizeX, InvTexSizeY); + } } ID3D11ShaderResourceView* views[1]{ nullptr }; @@ -326,16 +367,26 @@ void ScreenSpaceShadows::Prepass() void ScreenSpaceShadows::LoadSettings(json& o_json) { bendSettings = o_json; + if (o_json.contains("StereoOptRightEyeReduction")) + stereoOptRightEyeReduction = o_json["StereoOptRightEyeReduction"]; + if (o_json.contains("EnableStereoSync")) + enableStereoSync = o_json["EnableStereoSync"].get(); } void ScreenSpaceShadows::SaveSettings(json& o_json) { o_json = bendSettings; + o_json["StereoOptRightEyeReduction"] = stereoOptRightEyeReduction; + o_json["EnableStereoSync"] = enableStereoSync; } void ScreenSpaceShadows::RestoreDefaultSettings() { bendSettings = {}; + stereoOptRightEyeReduction = 0; + enableStereoSync = false; + if (globals::game::isVR) + bendSettings.SampleCount = 2; } bool ScreenSpaceShadows::HasShaderDefine(RE::BSShader::Type) @@ -346,7 +397,6 @@ bool ScreenSpaceShadows::HasShaderDefine(RE::BSShader::Type) void ScreenSpaceShadows::SetupResources() { raymarchCB = new ConstantBuffer(ConstantBufferDesc()); - if (globals::game::isVR) { stereoSyncCB = new ConstantBuffer(ConstantBufferDesc()); } diff --git a/src/Features/ScreenSpaceShadows.h b/src/Features/ScreenSpaceShadows.h index de9b8e1bd4..830d727342 100644 --- a/src/Features/ScreenSpaceShadows.h +++ b/src/Features/ScreenSpaceShadows.h @@ -35,7 +35,7 @@ struct ScreenSpaceShadows : Feature float BilinearThreshold = 0.02f; float ShadowContrast = !globals::game::isVR ? 1.0f : 4.0f; uint Enable = 1; - uint SampleCount = 1; + uint SampleCount = !globals::game::isVR ? 1u : 2u; uint pad0[3]; }; @@ -62,7 +62,7 @@ struct ScreenSpaceShadows : Feature }; STATIC_ASSERT_ALIGNAS_16(RaymarchCB); - bool enableStereoSync = true; + bool enableStereoSync = false; struct alignas(16) StereoSyncCB { @@ -71,11 +71,15 @@ struct ScreenSpaceShadows : Feature }; STATIC_ASSERT_ALIGNAS_16(StereoSyncCB); + int stereoOptRightEyeReduction = 0; // 0 = Half, 1 = Quarter sample count + ID3D11SamplerState* pointBorderSampler = nullptr; ConstantBuffer* raymarchCB = nullptr; ID3D11ComputeShader* raymarchCS = nullptr; ID3D11ComputeShader* raymarchRightCS = nullptr; + ID3D11ComputeShader* raymarchRightReducedCS = nullptr; + uint lastCompiledReducedSampleCount = 0; Texture2D* screenSpaceShadowsTexture = nullptr; @@ -94,6 +98,7 @@ struct ScreenSpaceShadows : Feature uint lastCompiledSampleCount = 0; ID3D11ComputeShader* GetComputeRaymarch(); ID3D11ComputeShader* GetComputeRaymarchRight(); + ID3D11ComputeShader* GetComputeRaymarchRightReduced(); virtual void Prepass() override; diff --git a/src/Features/TAAReorder.cpp b/src/Features/TAAReorder.cpp new file mode 100644 index 0000000000..386d127a38 --- /dev/null +++ b/src/Features/TAAReorder.cpp @@ -0,0 +1,575 @@ +/** + * @brief TAA Periphery Reordering for VR DLSS Viewport Scaling + * + * This implementation follows the approach pioneered by PureDark's Skyrim Upscaler + * (https://github.com/PureDark/Skyrim-Upscaler/tree/VR), which demonstrated how to + * reorder Skyrim's post-processing pipeline to run vanilla TAA on the periphery while + * DLSS processes a cropped center region. No code was copied; the approach was used as + * a reference for the conductor/hook architecture. + * + * PureDark's Skyrim Upscaler is licensed under the MIT License: + * Copyright (c) 2022 PureDark + * https://github.com/PureDark/Skyrim-Upscaler/blob/VR/LICENSE + */ +#include "TAAReorder.h" + +#include "Globals.h" +#include "Upscaling.h" +#include +#include + +namespace TAAReorder +{ + bool ShouldReorderTAA() + { + if (!g_initialized) + return false; + auto& upscaling = globals::features::upscaling; + return globals::game::isVR && + upscaling.settings.vrPeripheryTAA && + upscaling.settings.vrDlssViewportScale < 1.0f && + upscaling.GetUpscaleMethod() == Upscaling::UpscaleMethod::kDLSS; + } + + // ─── Setter A: Force TAA (pass-through) ─── + void ForceTAASetter::thunk() + { + func(); + } + + // ─── Setter B: TAA State Machine (pass-through) ─── + void TAAStateMachine::thunk() + { + func(); + } + + // ─── EnsurePostPPCopy: create/resize staging texture matching source ─── + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex) + { + D3D11_TEXTURE2D_DESC srcDesc; + sourceTex->GetDesc(&srcDesc); + + if (g_postPPCopy) { + D3D11_TEXTURE2D_DESC existingDesc; + g_postPPCopy->GetDesc(&existingDesc); + if (existingDesc.Width == srcDesc.Width && existingDesc.Height == srcDesc.Height && + existingDesc.Format == srcDesc.Format) + return; + } + + D3D11_TEXTURE2D_DESC desc = srcDesc; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.MiscFlags = 0; + g_postPPCopy = nullptr; + g_postPPCopySRV = nullptr; + globals::d3d::device->CreateTexture2D(&desc, nullptr, g_postPPCopy.put()); + + if (g_postPPCopy) { + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + globals::d3d::device->CreateShaderResourceView(g_postPPCopy.get(), &srvDesc, g_postPPCopySRV.put()); + Util::SetResourceName(g_postPPCopy.get(), "TAAReorder_PostPPCopy"); + } + } + + // ─── Helper: set up common fullscreen rendering state ─── + static void SetupFullscreenState(ID3D11DeviceContext* context, float vpX, float vpY, float vpW, float vpH) + { + D3D11_VIEWPORT viewport = {}; + viewport.TopLeftX = vpX; + viewport.TopLeftY = vpY; + viewport.Width = vpW; + viewport.Height = vpH; + viewport.MaxDepth = 1.0f; + + auto& upscaling = globals::features::upscaling; + context->RSSetViewports(1, &viewport); + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + context->OMSetBlendState(upscaling.upscaleBlendState.get(), nullptr, 0xffffffff); + } + + // ─── Helper: draw fullscreen triangle (point-sample format-converting copy) ─── + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH) + { + auto& upscaling = globals::features::upscaling; + auto context = globals::d3d::context; + + SetupFullscreenState(context, vpX, vpY, vpW, vpH); + context->PSSetShader(upscaling.GetDlssCompositePS(), nullptr, 0); + + ID3D11ShaderResourceView* srvs[] = { srcSRV }; + context->PSSetShaderResources(0, 1, srvs); + + ID3D11RenderTargetView* rtvs[] = { dstRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + context->Draw(3, 0); + } + + // ─── ExecutePass hook: capture Phase 2A output, detect Phase 5 ─── + void ExecutePassHook::thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag) + { + bool isPeripheryTAA = ShouldReorderTAA(); + bool shouldLog = (g_diagCounter == 0); + + // Compute pass index for Phase 2A / Phase 5 detection + int passIndex = -1; + if (isPeripheryTAA || shouldLog) { + uintptr_t managerAddr = (uintptr_t)manager; + uintptr_t passArrayBase = *(uintptr_t*)(managerAddr + 0x28); + if (passArrayBase) { + for (int i = 0; i < 40; i++) { + if (*(uintptr_t*)(passArrayBase + i * 8) == (uintptr_t)passObj) { + passIndex = i; + break; + } + } + } + } + + if (shouldLog) + logger::info("[TAAReorder] ExecutePass: src=0x{:X} dst=0x{:X} flag={} passIdx={}", + srcTech, dstTech, flag, passIndex); + + // Execute the original pass + func(manager, passObj, srcTech, dstTech, extraData, flag); + + // After Phase 2A: copy output RT to g_postPPCopy for DLSS to process + if (isPeripheryTAA && passIndex == 30 && dstTech == 0x29) { + ID3D11RenderTargetView* postRTV = nullptr; + globals::d3d::context->OMGetRenderTargets(1, &postRTV, nullptr); + if (postRTV) { + ID3D11Resource* res = nullptr; + postRTV->GetResource(&res); + if (res) { + ID3D11Texture2D* postTex = nullptr; + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&postTex); + if (postTex) { + EnsurePostPPCopy(postTex); + globals::d3d::context->CopyResource(g_postPPCopy.get(), postTex); + g_postPPReady = true; + if (shouldLog) { + D3D11_TEXTURE2D_DESC desc; + postTex->GetDesc(&desc); + logger::info("[TAAReorder] Phase 2A output: {}x{} fmt={} → copied to g_postPPCopy", + desc.Width, desc.Height, (uint32_t)desc.Format); + } + postTex->Release(); + } + res->Release(); + } + postRTV->Release(); + } + } + + // Detect Phase 5 completion + if (isPeripheryTAA && passIndex == 35) { + g_phase5Complete = true; + if (shouldLog) + logger::info("[TAAReorder] Phase 5 complete (passIdx=35)"); + } + } + + // ─── BSImagespaceShader hook: DLSS eval + paste after pipeline completes ─── + // Wraps call at 0x132C827 (write_thunk_call). func() encompasses the + // conductor (Phase 2A) but NOT Phase 5 (TAA+DRS) — Phase 5 runs after us. + // We evaluate DLSS on the captured Phase 2A output and paste the center + // via CopySubresourceRegion onto the submit texture. + void BSImagespaceShaderHook::thunk(void* a_this, uint64_t a_param) + { + func(a_this, a_param); + + if (!ShouldReorderTAA()) + return; + + bool shouldLog = (g_diagCounter == 0); + auto context = globals::d3d::context; + auto& upscaling = globals::features::upscaling; + + // Get submit texture from bound RT after pipeline stage completes + ID3D11RenderTargetView* submitRTV = nullptr; + context->OMGetRenderTargets(1, &submitRTV, nullptr); + ID3D11Texture2D* submitTex = nullptr; + if (submitRTV) { + ID3D11Resource* res = nullptr; + submitRTV->GetResource(&res); + if (res) { + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&submitTex); + res->Release(); + } + submitRTV->Release(); + } + + if (shouldLog) { + if (submitTex) { + D3D11_TEXTURE2D_DESC desc; + submitTex->GetDesc(&desc); + logger::info("[TAAReorder] BSImagespaceShaderHook: submitTex=0x{:X} {}x{} fmt={} bind=0x{:X} postPPReady={} phase5={}", + (uintptr_t)submitTex, desc.Width, desc.Height, (uint32_t)desc.Format, + desc.BindFlags, g_postPPReady, g_phase5Complete); + } else { + logger::info("[TAAReorder] BSImagespaceShaderHook: no submitTex bound"); + } + } + + // Step 1: Evaluate DLSS on the captured post-PP intermediate + if (g_postPPReady && g_postPPCopy) { + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: evaluating DLSS on g_postPPCopy..."); + + upscaling.Upscale(g_postPPCopy.get()); + g_dlssReady = true; + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: DLSS evaluation complete"); + } else if (shouldLog) { + logger::info("[TAAReorder] BSImagespaceShaderHook: skip DLSS (postPPReady={} postPPCopy={})", + g_postPPReady, (void*)g_postPPCopy.get()); + } + + // Step 2: Paste DLSS center from g_postPPCopy onto submit texture per-eye + if (g_dlssReady && submitTex && g_postPPCopy) { + auto screenSize = globals::state->screenSize; + uint32_t eyeW = (uint32_t)(screenSize.x / 2); + uint32_t eyeH = (uint32_t)screenSize.y; + float vpScale = upscaling.settings.vrDlssViewportScale; + uint32_t centerW = (uint32_t)(eyeW * vpScale); + uint32_t centerH = (uint32_t)(eyeH * vpScale); + uint32_t baseCenterX = (eyeW - centerW) / 2; + uint32_t centerY = (eyeH - centerH) / 2; + + // Apply nasal offset (in display resolution space, matching FinalizePerEyeOutputs) + int32_t nasalShift = (int32_t)(upscaling.settings.vrDlssCropOffsetX * eyeW); + + float featherWidth = upscaling.settings.vrDlssFeatherWidth; + float featherPixels = featherWidth * eyeW; + + // Feathered blend path: use FeatheredCompositePS with hardware alpha blending + bool useFeathered = featherPixels > 0.0f && upscaling.vrFeatheredCompositePS && upscaling.vrFeatheredCompositeBlendState; + if (useFeathered) { + // Re-acquire submitRTV (we released it above, need it for render target binding) + ID3D11RenderTargetView* pasteRTV = nullptr; + context->OMGetRenderTargets(1, &pasteRTV, nullptr); + + if (pasteRTV) { + // Save current pipeline state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11VertexShader* oldVS = nullptr; + context->VSGetShader(&oldVS, nullptr, nullptr); + ID3D11PixelShader* oldPS = nullptr; + context->PSGetShader(&oldPS, nullptr, nullptr); + + UINT oldNumVPs = 1; + D3D11_VIEWPORT oldVP; + context->RSGetViewports(&oldNumVPs, &oldVP); + + ID3D11ShaderResourceView* oldPSSRV = nullptr; + context->PSGetShaderResources(0, 1, &oldPSSRV); + ID3D11SamplerState* oldPSSampler = nullptr; + context->PSGetSamplers(0, 1, &oldPSSampler); + ID3D11Buffer* oldPSCB = nullptr; + context->PSGetConstantBuffers(0, 1, &oldPSCB); + + // Ensure CB exists (lazy create, matching Upscaling.cpp pattern) + if (!upscaling.vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, upscaling.vrFeatheredCompositeCB.put())); + } + + // Set shared state: VS, PS, IA, blend + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->PSSetShader(upscaling.vrFeatheredCompositePS.get(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(upscaling.vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + // Bind g_postPPCopy SRV as crop source at t0 + ID3D11ShaderResourceView* srvs[1] = { g_postPPCopySRV.get() }; + context->PSSetShaderResources(0, 1, srvs); + + // Bind render target + ID3D11RenderTargetView* rtvs[1] = { pasteRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + // Create/use linear sampler (use Upscaling's if available) + if (!upscaling.vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, upscaling.vrLinearSampler.put()); + } + ID3D11SamplerState* samplers[1] = { upscaling.vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + // Set viewport to this eye region within the SBS submit texture + D3D11_VIEWPORT vp = {}; + vp.TopLeftX = (float)eyeOffset; + vp.TopLeftY = 0.0f; + vp.Width = (float)eyeW; + vp.Height = (float)eyeH; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + // Update constant buffer with crop rect in SCREEN-SPACE pixel coordinates. + // SV_Position in the pixel shader is in screen space (not viewport-relative): + // for eye 0, x ranges [0, eyeW); for eye 1, x ranges [eyeW, 2*eyeW). + // CropOrigin must therefore include the eye offset so distance calculations + // in FeatheredCompositePS work correctly for both eyes. + // SrcUVOrigin/Scale remap crop-local [0,1] UV to the correct eye region + // within the full SBS g_postPPCopy texture. + uint32_t fullW = eyeW * 2; + uint32_t fullH = eyeH; + float srcUVOriginX = (float)(eyeOffset + offsetCenterX) / (float)fullW; + float srcUVOriginY = (float)centerY / (float)fullH; + float srcUVScaleX = (float)centerW / (float)fullW; + float srcUVScaleY = (float)centerH / (float)fullH; + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(upscaling.vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)(eyeOffset + offsetCenterX), (float)centerY, + (float)centerW, (float)centerH, + featherPixels, 0.0f, + srcUVOriginX, srcUVOriginY, + srcUVScaleX, srcUVScaleY, + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(upscaling.vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { upscaling.vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + context->Draw(3, 0); + } + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: feathered composite {}x{} at ({},{}) feather={:.1f}px nasalShift={} per-eye onto submit", + centerW, centerH, baseCenterX, centerY, featherPixels, nasalShift); + + // Restore pipeline state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + context->RSSetViewports(1, &oldVP); + context->VSSetShader(oldVS, nullptr, 0); + context->PSSetShader(oldPS, nullptr, 0); + context->PSSetShaderResources(0, 1, &oldPSSRV); + context->PSSetSamplers(0, 1, &oldPSSampler); + context->PSSetConstantBuffers(0, 1, &oldPSCB); + + if (oldBlendState) + oldBlendState->Release(); + if (oldVS) + oldVS->Release(); + if (oldPS) + oldPS->Release(); + if (oldPSSRV) + oldPSSRV->Release(); + if (oldPSSampler) + oldPSSampler->Release(); + if (oldPSCB) + oldPSCB->Release(); + + pasteRTV->Release(); + } else { + useFeathered = false; // fall through to hard copy + if (shouldLog) + logger::warn("[TAAReorder] BSImagespaceShaderHook: feathered path - could not re-acquire submitRTV, falling back to hard copy"); + } + } + if (!useFeathered) { + // Hard edge path: CopySubresourceRegion (feather disabled or resources not ready) + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + D3D11_BOX srcBox = { + eyeOffset + offsetCenterX, centerY, 0, + eyeOffset + offsetCenterX + centerW, centerY + centerH, 1 + }; + context->CopySubresourceRegion(submitTex, 0, + eyeOffset + offsetCenterX, centerY, 0, + g_postPPCopy.get(), 0, &srcBox); + } + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: hard-copy pasted DLSS crop {}x{} at ({},{}) nasalShift={} per-eye onto submit", + centerW, centerH, baseCenterX, centerY, nasalShift); + } + + g_dlssPasteComplete = true; + } else if (shouldLog) { + logger::info("[TAAReorder] BSImagespaceShaderHook: skip paste (dlssReady={} submitTex={} postPPCopy={})", + g_dlssReady, (void*)submitTex, (void*)g_postPPCopy.get()); + } + + if (submitTex) + submitTex->Release(); + } + + // ─── Depth/stencil registration hook: diagnostic logging ─── + // Tracks dimensions per slot and logs whenever they change. + // data[0]=width, data[1]=height based on initial analysis. + void DepthStencilRegHook::thunk(void* manager, uint32_t slot, void* desc) + { + if (desc && slot < 32) { + auto* data = reinterpret_cast(desc); + static uint32_t lastWidth[32] = {}; + static uint32_t lastHeight[32] = {}; + static uint32_t callCount[32] = {}; + + callCount[slot]++; + bool dimsChanged = (data[0] != lastWidth[slot] || data[1] != lastHeight[slot]); + if (dimsChanged) { + logger::info("[TAAReorder] DepthStencilReg: slot={} {}x{} → {}x{} (call #{}) data[2..7]= {} {} {} {} {} {}", + slot, lastWidth[slot], lastHeight[slot], data[0], data[1], callCount[slot], + data[2], data[3], data[4], data[5], data[6], data[7]); + lastWidth[slot] = data[0]; + lastHeight[slot] = data[1]; + } + } + + func(manager, slot, desc); + } + + // ─── Hidden area mesh render hook: pass-through ─── + // HAM renders normally. Previous "frozen frame" artifacts at the HAM boundary + // were caused by the depth upscaler's conservative blending (GatherRed + lerp) + // leaking depth=0 mask values into valid depth. Fixed in DepthUpscalePS.hlsl + // by switching to pure point sampling. + // HiddenAreaMeshHook removed — the passthrough hook was breaking HAM + // by corrupting the original function via Detours on an unverified RVA. + + // ─── BSOpenVR::Submit hook: diagnostic logging ─── + void SubmitHook::thunk(void* thisPtr, void* textureHandle) + { + if (g_diagCounter == 0 && textureHandle) { + auto tex2d = static_cast(textureHandle); + D3D11_TEXTURE2D_DESC desc = {}; + tex2d->GetDesc(&desc); + auto base = REL::Module::get().base(); + auto retAddr = reinterpret_cast(_ReturnAddress()); + logger::info("[TAAReorder] Submit: tex=0x{:X} {}x{} fmt={} dlssPasted={} callerRVA=0x{:X}", + (uintptr_t)textureHandle, desc.Width, desc.Height, (uint32_t)desc.Format, + g_dlssPasteComplete, retAddr - base); + } + + func(thisPtr, textureHandle); + } + + // ─── Post-processing conductor call hook: pass-through (tracking only) ─── + // Inner conductor call at 0x1325086 inside BSImagespaceShader::Render. + // Only tracks g_insideConductor state. DLSS logic is in BSImagespaceShaderHook. + void ConductorCallHook::thunk(void* a1, void* a2, void* a3, void* a4) + { + g_insideConductor = true; + func(a1, a2, a3, a4); + g_insideConductor = false; + } + + void InitEarly() + { + auto base = REL::Module::get().base(); + + // ─── Hook: DepthStencilRegistration (RVA 0x00DC79D0) ─── + // Must be installed before renderer initialization (which registers depth/stencil targets). + // Called from Upscaling::Load(), before D3D device creation. + DepthStencilRegHook::func = reinterpret_cast(base + 0x00DC79D0); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&DepthStencilRegHook::func), reinterpret_cast(DepthStencilRegHook::thunk)); + DetourTransactionCommit(); + + logger::info("[TAAReorder] InitEarly: DepthStencil registration hooked at RVA 0x00DC79D0"); + } + + void Init() + { + auto base = REL::Module::get().base(); + + // ─── Core pointers ─── + g_pRendererSingleton = reinterpret_cast(base + 0x034234C0); + + // ─── Hook: ForceTAASetter (RVA 0x005C8EE0) ─── + ForceTAASetter::func = base + 0x005C8EE0; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ForceTAASetter::func), reinterpret_cast(ForceTAASetter::thunk)); + DetourTransactionCommit(); + + // ─── Hook: TAAStateMachine (RVA 0x005C8F10) ─── + TAAStateMachine::func = base + 0x005C8F10; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&TAAStateMachine::func), reinterpret_cast(TAAStateMachine::thunk)); + DetourTransactionCommit(); + + // ─── Hook: ExecutePass (RVA 0x012D2540) ─── + ExecutePassHook::func = reinterpret_cast(base + 0x012D2540); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ExecutePassHook::func), reinterpret_cast(ExecutePassHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSOpenVR::Submit (RVA 0x00C53920) ─── + SubmitHook::func = reinterpret_cast(base + 0x00C53920); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&SubmitHook::func), reinterpret_cast(SubmitHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSImagespaceShader via write_thunk_call at RVA 0x132C827 ─── + // Wraps BSImagespaceShader::Render from the Orchestrator level. + // func() encompasses conductor (Phase 2A) + Phase 5 (TAA+DRS) + Submit. + // After func(): DLSS eval + paste. Matches PureDark's BSImagespaceShader_Hook_VR. + stl::write_thunk_call(base + 0x132C827); + + // ─── Hook: Inner conductor call via write_thunk_call at RVA 0x1325086 ─── + // Pass-through, only tracks g_insideConductor state. + stl::write_thunk_call(base + 0x1325086); + + g_initialized = true; + + logger::info("[TAAReorder] Initialized — base=0x{:X}", base); + logger::info("[TAAReorder] Post-pipeline DLSS mode (periphery TAA)"); + logger::info("[TAAReorder] BSImagespaceShader hooked via write_thunk_call at RVA 0x132C827 (DLSS eval + paste)"); + logger::info("[TAAReorder] Inner conductor hooked via write_thunk_call at RVA 0x1325086 (tracking only)"); + logger::info("[TAAReorder] BSOpenVR::Submit hooked at RVA 0x00C53920"); + } +} diff --git a/src/Features/TAAReorder.h b/src/Features/TAAReorder.h new file mode 100644 index 0000000000..9720cd2e5b --- /dev/null +++ b/src/Features/TAAReorder.h @@ -0,0 +1,174 @@ +#pragma once + +// TAA Reordering for VR DLSS Viewport Scaling (Post-Conductor DLSS) +// +// PureDark's approach: DLSS is evaluated AFTER BSImagespaceShader::Render +// completes (which includes the conductor + Phase 5 TAA + DRS). +// +// Flow: +// 1. BSImagespaceShaderHook wraps the call at 0x132C827: +// func() runs → conductor executes Phase 2A → Phase 5 (TAA + DRS) +// a. ExecutePassHook captures Phase 2A output to g_postPPCopy +// 2. After func() returns in BSImagespaceShaderHook: +// a. Gets submit texture from bound RT (now has TAA-upscaled content) +// b. Evaluates DLSS on g_postPPCopy (post-PP intermediate) +// c. Pastes DLSS center from g_postPPCopy onto submit texture +// 3. Engine continues: Orchestrator → Scaleform Display (UI) → Submit +// 4. Lock DRS + UpdateCameraData (in Main_PostProcessing::thunk after func()) +// +// Both DLSS and TAA get Phase 2A's PP applied: +// - TAA: naturally (Phase 2A runs before Phase 5 in conductor) +// - DLSS: processes the Phase 2A output copy (g_postPPCopy) +// +// All RVAs are VR-specific (SkyrimVR.exe). + +#include +#include +#include +#include + +struct Upscaling; + +namespace TAAReorder +{ + // ─── Function pointer types ─── + using ExecutePass_t = void (*)(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + using BSOpenVRSubmit_t = void (*)(void* thisPtr, void* textureHandle); + + // ─── Resolved global data pointers ─── + inline uintptr_t* g_pRendererSingleton = nullptr; + inline bool g_initialized = false; + + // ─── Diagnostics (rate-limited logging) ─── + inline int g_diagCounter = 0; + inline constexpr int DIAG_INTERVAL = 300; + + // ─── Per-frame sequence counter (for verifying call ordering) ─── + inline int g_frameSeqCounter = 0; + + // ─── ExecutePass hook (conductor interposition) ─── + // RVA: 0x012D2540 — called by the conductor for each render pass. + // Copies Phase 2A output RT to g_postPPCopy for DLSS to process. + struct ExecutePassHook + { + static void thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + static inline ExecutePass_t func = nullptr; + }; + + // ─── BSImagespaceShader hook (DLSS eval + paste after pipeline completes) ─── + // RVA: 0x132C827 — write_thunk_call wrapping BSImagespaceShader::Render. + // This is the OUTER call that encompasses the conductor + Phase 5 (TAA+DRS). + // After func() returns: submit texture has TAA-upscaled content. + // We evaluate DLSS on g_postPPCopy and paste the center onto submit texture. + // (Matches PureDark's BSImagespaceShader_Hook_VR) + struct BSImagespaceShaderHook + { + static void thunk(void* a_this, uint64_t a_param); + static inline REL::Relocation func; + }; + + // ─── Post-processing conductor call hook (pass-through, tracking only) ─── + // RVA: 0x1325086 — inner conductor call inside BSImagespaceShader::Render. + // Only used for g_insideConductor tracking. + struct ConductorCallHook + { + static void thunk(void* a1, void* a2, void* a3, void* a4); + static inline REL::Relocation func; + }; + + // ─── Post-PP copy (Phase 2A output, DLSS color source) ─── + // After Phase 2A completes, ExecutePassHook copies the bound RT here. + // BSImagespaceShaderHook passes this to Upscale() as colorSourceOverride. + // After DLSS, FinalizePerEyeOutputs writes DLSS center back into this texture. + inline winrt::com_ptr g_postPPCopy; + inline winrt::com_ptr g_postPPCopySRV; + inline bool g_postPPReady = false; + + // ─── DLSS evaluation complete flag ─── + // Set after BSImagespaceShaderHook evaluates DLSS on g_postPPCopy. + // Used to gate the DLSS center paste step. + inline bool g_dlssReady = false; + + // ─── DLSS paste complete flag ─── + // Set after ConductorCallHook pastes DLSS center onto submit texture. + inline bool g_dlssPasteComplete = false; + + // ─── Phase 5 tracking ─── + inline bool g_phase5Complete = false; + + // ─── Conductor state tracking ─── + inline bool g_insideConductor = false; + inline int g_bsHookCallCount = 0; + + // ─── RGB-only blend state (may be useful for future feathering) ─── + inline winrt::com_ptr g_rgbOnlyBlendState; + + // ─── Stencil state for HAM-aware compositing ─── + // DepthEnable=false, StencilEnable=true, StencilFunc=EQUAL, StencilRef=0. + // Only writes to pixels where stencil==0 (visible, non-HAM pixels). + // Matches PureDark's approach in Evaluate()/RenderTexture(). + inline winrt::com_ptr g_hamStencilState; + + // ─── Cached UAV for submit texture (ClearHMDMask + ForceAlpha on submit after DLSS paste) ─── + inline winrt::com_ptr g_submitTexUAV; + inline ID3D11Texture2D* g_submitTexUAVOwner = nullptr; // track which texture the UAV belongs to + + // ─── ForceAlpha compute shader (sets alpha=1.0 to fix Scaleform UI rendering) ─── + inline winrt::com_ptr g_forceAlphaCS; + + // ─── Setter hook: Setter A (Force TAA) ─── + // RVA: 0x005C8EE0 — unconditional TAA enable. + // Pass-through (we want TAA to run natively). + struct ForceTAASetter + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Setter hook: Setter B (TAA State Machine) ─── + // RVA: 0x005C8F10 — conditional TAA enable/disable. + // Pass-through (we want TAA to run natively). + struct TAAStateMachine + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Depth/stencil registration hook ─── + // RVA: 0x00DC79D0 — registers depth/stencil targets in the RT manager (+0x1388). + // Separate path from color RTs (registered via sub_417980 at +0x1350). + // Hook intercepts registration to log descriptor layout and scale dimensions + // for slots 0, 1, 7 to match display resolution (fixes HAM not being upscaled). + using RegisterDepthStencil_t = void (*)(void* manager, uint32_t slot, void* desc); + struct DepthStencilRegHook + { + static void thunk(void* manager, uint32_t slot, void* desc); + static inline RegisterDepthStencil_t func = nullptr; + }; + + // ─── BSOpenVR::Submit hook (VR frame submission interception) ─── + // RVA: 0x00C53920 — BSOpenVR::Submit, vtable[3]. + // Diagnostic logging only. + struct SubmitHook + { + static void thunk(void* thisPtr, void* textureHandle); + static inline BSOpenVRSubmit_t func = nullptr; + }; + + // Check if TAA reordering should be active based on current settings + bool ShouldReorderTAA(); + + // Ensure g_postPPCopy matches the source texture dimensions/format + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex); + + // Helper: draw fullscreen format-converting copy (Load-based, 1:1 pixel copy). + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH); + + // Install hooks that must be in place before renderer initialization (depth/stencil reg). + // Call from Upscaling::Load() (VR only). + void InitEarly(); + + // Initialize all pointers and install hooks. Call once from PostPostLoad (VR only). + void Init(); +} diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 604f49e986..fbc97118e6 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -1,8 +1,10 @@ #include "Upscaling.h" #include "Deferred.h" +#include "Features/VRStereoOptimizations.h" #include "Hooks.h" #include "State.h" +#include "TAAReorder.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" #include "Upscaling/Streamline.h" @@ -25,7 +27,11 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( sharpnessFSR, sharpnessDLSS, presetDLSS, - useGatherWideKernel); + useGatherWideKernel, + vrDlssViewportScale, + vrPeripheryTAA, + vrDlssCropOffsetX, + vrDlssFeatherWidth); decltype(&D3D11CreateDeviceAndSwapChain) ptrD3D11CreateDeviceAndSwapChainUpscaling; @@ -61,7 +67,9 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( upscaling.CheckBackendFeatures(pAdapter); // Use better swap effect to prevent tearing and improve performance - pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + // But don't change it for VR as it can affect frame pacing with the VR compositor + if (!globals::game::isVR) + pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; bool shouldProxy = !globals::game::isVR; if (shouldProxy) @@ -141,8 +149,14 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( ppImmediateContext); if (upscaling.IsBackendInitialized()) { - upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); - upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + // Skip Streamline interface wrapping for VR — slUpgradeInterface wraps the D3D + // device and swap chain with Streamline proxy objects, which disrupts VR compositor + // frame pacing (causes judder/stuttering). DLSS still functions without wrapped + // interfaces; only frame generation requires them (and that's already VR-gated above). + if (!globals::game::isVR) { + upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); + upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + } upscaling.SetBackendD3DDevice(*ppDevice); upscaling.PostBackendDevice(); } @@ -238,6 +252,41 @@ void Upscaling::DrawSettings() ImGui::Text("Set to 'Default' for automatic selection based on your Upscale Preset and hardware."); ImGui::Text("Changing this setting requires a restart to take effect."); } + + if (globals::game::isVR) { + if (ImGui::TreeNodeEx("VR Viewport Scaling", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::SliderFloat("DLSS Viewport Scale", &settings.vrDlssViewportScale, 0.5f, 1.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Controls the fraction of each eye's view that DLSS processes."); + ImGui::Text("Lower values = better performance, less visible area upscaled by DLSS."); + ImGui::Text("The VR lens hides the periphery, so 0.7-0.85 is recommended."); + ImGui::Text("The periphery outside the DLSS region is filled with a bilinear upscale."); + } + + if (settings.vrDlssViewportScale < 1.0f) { + bool peripheryTAA = settings.vrPeripheryTAA != 0; + if (ImGui::Checkbox("Periphery TAA", &peripheryTAA)) + settings.vrPeripheryTAA = peripheryTAA ? 1 : 0; + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Applies temporal anti-aliasing to the bilinear-upscaled periphery."); + ImGui::Text("Reduces shimmer and improves peripheral quality."); + ImGui::Text("The DLSS center region passes through unchanged."); + } + + ImGui::SliderFloat("Nasal Crop Offset", &settings.vrDlssCropOffsetX, 0.0f, 0.3f, "%.3f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Shifts the DLSS crop region toward the nose."); + ImGui::Text("Higher values = more of the nasal view processed by DLSS."); + ImGui::Text("0.0 = centered, 0.1-0.2 recommended."); + } + + // Feathered edge disabled pending fix — hidden from UI + // ImGui::SliderFloat("Crop Edge Feather", &settings.vrDlssFeatherWidth, 0.0f, 0.1f, "%.3f"); + } + + ImGui::TreePop(); + } + } } if (globals::game::isVR) { @@ -460,6 +509,9 @@ void Upscaling::LoadSettings(json& o_json) logger::warn("[Upscaling] Loaded useGatherWideKernel {} out of range, clamping to 1", settings.useGatherWideKernel); settings.useGatherWideKernel = 1; } + settings.vrDlssViewportScale = std::clamp(settings.vrDlssViewportScale, 0.5f, 1.0f); + settings.vrDlssCropOffsetX = std::clamp(settings.vrDlssCropOffsetX, 0.0f, 0.3f); + settings.vrDlssFeatherWidth = std::clamp(settings.vrDlssFeatherWidth, 0.0f, 0.1f); auto iniSettingCollection = globals::game::iniPrefSettingCollection; if (iniSettingCollection) { auto setting = iniSettingCollection->GetSetting("bUseTAA:Display"); @@ -487,6 +539,10 @@ void Upscaling::DataLoaded() void Upscaling::Load() { *(uintptr_t*)&ptrD3D11CreateDeviceAndSwapChainUpscaling = SKSE::PatchIAT(hk_D3D11CreateDeviceAndSwapChainUpscaling, "d3d11.dll", "D3D11CreateDeviceAndSwapChain"); + + // Install depth/stencil registration hook early (before renderer creates targets) + if (globals::game::isVR) + TAAReorder::InitEarly(); } struct BSImageSpace_Init_FXAA @@ -530,6 +586,10 @@ void Upscaling::PostPostLoad() stl::detour_thunk(REL::RelocationID(98974, 105626)); logger::info("[Upscaling] Installed hooks"); + + // Install TAA reordering hooks for VR periphery TAA + if (globals::game::isVR) + TAAReorder::Init(); } Upscaling::UpscaleMethod Upscaling::GetUpscaleMethod() const @@ -639,6 +699,13 @@ void Upscaling::DestroyUpscalingTextureResources(UpscaleMethod a_upscalemethod) } } + // VR periphery TAA textures - only needed for DLSS with viewport scaling + if (a_upscalemethod != UpscaleMethod::kDLSS) { + vrPreTAACopy = nullptr; + for (int i = 0; i < 2; i++) + vrTAAdPerEye[i].reset(); + } + // Motion vector copy texture is only needed for DLSS - destroy when switching away from DLSS if (a_upscalemethod != UpscaleMethod::kDLSS) { if (motionVectorCopyTexture) { @@ -692,6 +759,8 @@ void Upscaling::CheckResources(UpscaleMethod a_upscalemethod) vrIntermediateMotionVectors[i].reset(); vrIntermediateReactiveMask[i].reset(); vrIntermediateTransparencyMask[i].reset(); + vrFinalOutput[i].reset(); + vrCropColorIn[i].reset(); } } } @@ -744,6 +813,8 @@ ID3D11PixelShader* Upscaling::GetDepthRefractionUpscalePS() if (!depthRefractionUpscalePS) { logger::debug("Compiling DepthRefractionUpscalePS.hlsl"); std::vector> defines = { { "PSHADER", "" } }; + if (globals::game::isVR) + defines.push_back({ "VR", "" }); depthRefractionUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DepthRefractionUpscalePS.hlsl", defines, "ps_5_0")); } @@ -771,6 +842,37 @@ ID3D11VertexShader* Upscaling::GetUpscaleVS() return upscaleVS.get(); } +ID3D11PixelShader* Upscaling::GetDlssCompositePS() +{ + if (!vrDlssCompositePS) { + logger::debug("Compiling DLSSCompositePS.hlsl"); + vrDlssCompositePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0")); + } + + return vrDlssCompositePS.get(); +} + +ID3D11PixelShader* Upscaling::GetDlssUpscalePS() +{ + if (!vrDlssUpscalePS) { + logger::debug("Compiling DLSSCompositePS.hlsl (BILINEAR_UPSCALE)"); + vrDlssUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", + { { "PSHADER", "" }, { "BILINEAR_UPSCALE", "" } }, "ps_5_0")); + } + + if (!vrDlssUpscaleCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = sizeof(DlssCompositeCB); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrDlssUpscaleCB.put())); + } + + return vrDlssUpscalePS.get(); +} + eastl::unique_ptr Upscaling::CreateTextureFromSource(ID3D11Resource* src, uint32_t width, uint32_t height, bool copyBindFlags, bool createSRV, bool createUAV, const char* name) { @@ -820,7 +922,7 @@ void Upscaling::CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight std::string suffix = (i == 0) ? "Left" : "Right"; vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, inWidth, inHeight, false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); - vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, false, ("Upscale_ColorOut_" + suffix).c_str()); + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); // Depth: R32_TYPELESS base (matches kMAIN), with R32_FLOAT SRV for ClearHMDMaskCS. // CopySubresourceRegion requires matching typeless formats; SRV reinterprets as R32_FLOAT. @@ -873,48 +975,228 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; - bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; - if (!needsRecreate) { - needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); - } - if (needsRecreate) { - logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", - eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); - CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, - colorSrc, mvecSrc, reactiveSrc, transparencySrc); - } + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + if (viewportScaling) { + // Viewport scaling: physically crop all DLSS inputs to eliminate non-zero subrect offsets. + // vrIntermediateColorIn stays at FULL render-res (for ClearHMDMask + FillPeriphery). + // All other DLSS inputs (depth, mvec, masks) are CROP-sized. + // vrCropColorIn is CROP-sized (DLSS color input, extracted from masked full color). + // This ensures DLSS sees all inputs at {0,0} with no subrect base offsets, + // which is critical for correct temporal reprojection during camera motion. + uint32_t cropWidthIn = (uint32_t)(eyeWidthIn * vpScale); + uint32_t cropHeightIn = (uint32_t)(eyeHeightIn * vpScale); + uint32_t cropWidthOut = (uint32_t)(eyeWidthOut * vpScale); + uint32_t cropHeightOut = (uint32_t)(eyeHeightOut * vpScale); + + bool needsRecreate = !vrIntermediateColorIn[0] || !vrCropColorIn[0] || !vrIntermediateDepth[0] || + !vrIntermediateColorOut[0] || !vrFinalOutput[0]; + if (!needsRecreate) { + // Check format too — periphery TAA feeds R8G8B8A8 post-PP intermediate, + // while normal DLSS feeds R11G11B10 kMAIN. Must recreate on format change. + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrCropColorIn[0]->desc.Width != cropWidthIn || + vrCropColorIn[0]->desc.Height != cropHeightIn || + vrIntermediateDepth[0]->desc.Width != cropWidthIn || + vrIntermediateDepth[0]->desc.Height != cropHeightIn || + vrIntermediateColorOut[0]->desc.Width != cropWidthOut || + vrIntermediateColorOut[0]->desc.Height != cropHeightOut || + vrFinalOutput[0]->desc.Width != eyeWidthOut || + vrFinalOutput[0]->desc.Height != eyeHeightOut); + } - // Extract both eyes' inputs from combined stereo buffers - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR viewport-scaled intermediates: full {}x{}, crop in {}x{}, crop out {}x{}", + eyeWidthIn, eyeHeightIn, cropWidthIn, cropHeightIn, cropWidthOut, cropHeightOut); + + for (int i = 0; i < 2; i++) { + std::string suffix = (i == 0) ? "Left" : "Right"; + + // Full-size color for ClearHMDMask + FillPeriphery + vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, eyeWidthIn, eyeHeightIn, + false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); + + // Crop-sized DLSS color input (needs UAV for ClearHMDMask) + vrCropColorIn[i] = CreateTextureFromSource(colorSrc, cropWidthIn, cropHeightIn, + false, true, true, ("Upscale_CropColorIn_" + suffix).c_str()); + + // Crop-sized DLSS output + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, cropWidthOut, cropHeightOut, + false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); + + // Crop-sized depth (R32_TYPELESS with R32_FLOAT SRV) + { + D3D11_TEXTURE2D_DESC depthDesc = {}; + depthDesc.Width = cropWidthIn; + depthDesc.Height = cropHeightIn; + depthDesc.MipLevels = 1; + depthDesc.ArraySize = 1; + depthDesc.Format = DXGI_FORMAT_R32_TYPELESS; + depthDesc.SampleDesc.Count = 1; + depthDesc.Usage = D3D11_USAGE_DEFAULT; + depthDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + vrIntermediateDepth[i] = eastl::make_unique(depthDesc); + Util::SetResourceName(vrIntermediateDepth[i]->resource.get(), ("Upscale_Depth_" + suffix).c_str()); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + vrIntermediateDepth[i]->CreateSRV(srvDesc); + } - context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); - } + // Crop-sized motion vectors, reactive mask, transparency mask + vrIntermediateMotionVectors[i] = CreateTextureFromSource(mvecSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_MVec_" + suffix).c_str()); + vrIntermediateReactiveMask[i] = CreateTextureFromSource(reactiveSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Reactive_" + suffix).c_str()); + vrIntermediateTransparencyMask[i] = CreateTextureFromSource(transparencySrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Transparency_" + suffix).c_str()); + + // Full display-res composition target (needs RTV for PS feathered composite) + vrFinalOutput[i] = CreateTextureFromSource(colorSrc, eyeWidthOut, eyeHeightOut, + false, true, true, ("Upscale_FinalOutput_" + suffix).c_str()); + // Add render target bind flag and create RTV for pixel shader composite + { + D3D11_TEXTURE2D_DESC finalDesc; + vrFinalOutput[i]->resource->GetDesc(&finalDesc); + if (!(finalDesc.BindFlags & D3D11_BIND_RENDER_TARGET)) { + // Recreate with render target support + finalDesc.BindFlags |= D3D11_BIND_RENDER_TARGET; + vrFinalOutput[i] = eastl::make_unique(finalDesc); + Util::SetResourceName(vrFinalOutput[i]->resource.get(), ("Upscale_FinalOutput_" + suffix).c_str()); + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = finalDesc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = 1; + vrFinalOutput[i]->CreateSRV(srvDesc); + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = finalDesc.Format; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateUAV(uavDesc); + } + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = finalDesc.Format; + rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateRTV(rtvDesc); + } + } + } - // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer. - // Depth is read from the combined stereo SRV at the per-eye offset; color is written - // to the isolated per-eye UAV (ColorOffsetX = 0). - auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + // Copy full eye to full-size vrIntermediateColorIn (raw render-res, no HMD mask yet) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + } + + // Nasal offset: shift crop toward nose + // Eye 0 (left): nose is right -> shift right (+) + // Eye 1 (right): nose is left -> shift left (-) + float nasalShiftFrac = settings.vrDlssCropOffsetX; + uint32_t baseCropOffsetX = (eyeWidthIn - cropWidthIn) / 2; + uint32_t cropOffsetY = (eyeHeightIn - cropHeightIn) / 2; + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + + // Extract DLSS crop from raw buffer (before TAA or HMD mask), then mask the crop directly. + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + + // Per-eye nasal offset in render resolution space + int32_t nasalShift = (int32_t)(nasalShiftFrac * eyeWidthIn); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t cropOffsetX = (uint32_t)std::clamp((int32_t)baseCropOffsetX + eyeNasalShift, 0, (int32_t)(eyeWidthIn - cropWidthIn)); + + // Crop color from raw (unmasked, non-TAA'd) full-size buffer + D3D11_BOX cropBox = { cropOffsetX, cropOffsetY, 0, cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrCropColorIn[i]->resource.get(), 0, 0, 0, 0, + vrIntermediateColorIn[i]->resource.get(), 0, &cropBox); + + // ClearHMDMask directly on the crop (depth offset accounts for eye + crop position in stereo buffer) + ClearHMDMask(vrCropColorIn[i]->uav.get(), depthTexture.depthSRV, + cropWidthIn, cropHeightIn, offsetXIn + cropOffsetX, 0, cropOffsetY); + + // Crop depth/mvec/reactive/transparency directly from stereo buffers + D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, + offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, + depthSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, + mvecSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, + reactiveSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, + transparencySrc, 0, &stereoCropBox); + } - for (uint32_t i = 0; i < 2; ++i) { - uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; - ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, - eyeWidthIn, eyeHeightIn, depthOffset, 0); + // ClearHMDMask on full-size buffer (for FillPeriphery) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } + } else { + // Non-viewport-scaling path: all textures at full per-eye dimensions + bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; + if (!needsRecreate) { + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + } + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", + eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, + colorSrc, mvecSrc, reactiveSrc, transparencySrc); + } + + // Release viewport-scaling-specific textures + for (int i = 0; i < 2; i++) { + vrCropColorIn[i].reset(); + vrFinalOutput[i].reset(); + vrTAAdPerEye[i].reset(); + } + vrPreTAACopy = nullptr; + + // Copy full eye to per-eye intermediates + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); + } + + // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } } if (state->frameAnnotations) state->EndPerfEvent(); } -void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) +void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only) { if (!globals::game::isVR) return; @@ -929,11 +1211,258 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); uint32_t eyeHeightOut = (uint32_t)screenSize.y; - // Write upscaled outputs back - for (uint32_t i = 0; i < 2; ++i) { + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { uint32_t offsetXOut = (i == 1) ? eyeWidthOut : 0; - D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; - context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + + if (viewportScaling && vrFinalOutput[i]) { + // Paste crop-sized DLSS output into center of full-size composition target + uint32_t dlssWidthOut = vrIntermediateColorOut[i]->desc.Width; + uint32_t dlssHeightOut = vrIntermediateColorOut[i]->desc.Height; + + // Calculate paste position with nasal offset (in display resolution space) + int32_t nasalShift = (int32_t)(settings.vrDlssCropOffsetX * eyeWidthOut); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t pasteX = (uint32_t)std::clamp((int32_t)((eyeWidthOut - dlssWidthOut) / 2) + eyeNasalShift, 0, (int32_t)(eyeWidthOut - dlssWidthOut)); + uint32_t pasteY = (eyeHeightOut - dlssHeightOut) / 2; + + float featherPixels = settings.vrDlssFeatherWidth * eyeWidthOut; + + static uint32_t featherLogCount = 0; + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: featherPixels={:.1f} (width={:.4f} * eyeW={}), dlss={}x{}, paste=({},{})", + i, featherPixels, settings.vrDlssFeatherWidth, eyeWidthOut, dlssWidthOut, dlssHeightOut, pasteX, pasteY); + } + + if (featherPixels > 0.0f && vrFinalOutput[i]->rtv) { + // Pixel shader feathered composite (preserves periphery TAA via hardware alpha blend) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + + // Lazy-compile PS and create blend state + if (!vrFeatheredCompositePS) { + vrFeatheredCompositePS.attach(reinterpret_cast(Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0"))); + if (vrFeatheredCompositePS) + logger::info("[Upscaling] FeatheredCompositePS compiled successfully"); + else + logger::error("[Upscaling] FeatheredCompositePS FAILED to compile!"); + + // Create SrcAlpha/InvSrcAlpha blend state + D3D11_BLEND_DESC blendDesc = {}; + blendDesc.RenderTarget[0].BlendEnable = TRUE; + blendDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; + blendDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; + blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(globals::d3d::device->CreateBlendState(&blendDesc, vrFeatheredCompositeBlendState.put())); + + // Create constant buffer (48 bytes: float2 origin, float2 size, float feather, pad, float2 srcUVOrigin, float2 srcUVScale) + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + + if (vrFeatheredCompositePS) { + // Save current OM state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11RenderTargetView* oldRTV = nullptr; + ID3D11DepthStencilView* oldDSV = nullptr; + context->OMGetRenderTargets(1, &oldRTV, &oldDSV); + + // Update constant buffer + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)pasteX, (float)pasteY, + (float)dlssWidthOut, (float)dlssHeightOut, + featherPixels, 0.0f, + 0.0f, 0.0f, // SrcUVOrigin: (0,0) for per-eye textures (identity) + 1.0f, 1.0f, // SrcUVScale: (1,1) for per-eye textures (identity) + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + // Set viewport to match the per-eye final output + D3D11_VIEWPORT vp = {}; + vp.Width = (float)eyeWidthOut; + vp.Height = (float)eyeHeightOut; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + // Set blend state for feathered alpha compositing + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + // Set render target to vrFinalOutput (contains TAA'd periphery) + ID3D11RenderTargetView* rtvs[1] = { vrFinalOutput[i]->rtv.get() }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + // Set shaders + context->VSSetShader(GetUpscaleVS(), nullptr, 0); + context->PSSetShader(vrFeatheredCompositePS.get(), nullptr, 0); + + // Set input assembler for bufferless fullscreen triangle + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + // Bind rasterizer state (no culling) + context->RSSetState(upscaleRasterizerState.get()); + + // Bind crop texture SRV and sampler + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->PSSetShaderResources(0, 1, srvs); + + if (!vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&sampDesc, vrLinearSampler.put())); + } + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + // Bind constant buffer + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + // Draw fullscreen triangle + context->Draw(3, 0); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositePS drawn: eye={}, crop=({},{}) {}x{}, feather={:.1f}", + i, pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels); + featherLogCount++; + } + + // Cleanup PS state + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->PSSetShaderResources(0, 1, nullSRV); + context->PSSetShader(nullptr, nullptr, 0); + context->VSSetShader(nullptr, nullptr, 0); + + // Restore OM state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + if (oldBlendState) + oldBlendState->Release(); + context->OMSetRenderTargets(1, &oldRTV, oldDSV); + if (oldRTV) + oldRTV->Release(); + if (oldDSV) + oldDSV->Release(); + } else { + // PS compilation failed; fall back to hard paste + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else if (featherPixels > 0.0f) { + // Fallback: RTV not available, use legacy CS path + if (!vrFeatheredCompositeCS) { + vrFeatheredCompositeCS.attach((ID3D11ComputeShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositeCS.hlsl", {}, "cs_5_0")); + if (vrFeatheredCompositeCS) + logger::info("[Upscaling] FeatheredCompositeCS compiled (fallback)"); + else + logger::error("[Upscaling] FeatheredCompositeCS FAILED to compile!"); + if (!vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + } + + if (vrFeatheredCompositeCS) { + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + uint32_t cx, cy, cw, ch; + float feather; + float pad[3]; + } cbData = { + pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels, {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[i]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(vrFeatheredCompositeCS.get(), nullptr, 0); + context->Dispatch((dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8, 1); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositeCS dispatched (fallback): groups=({},{})", + (dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8); + featherLogCount++; + } + + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->CSSetShaderResources(0, 1, nullSRV); + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + } else { + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else { + // Hard paste (no feathering) + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: using HARD PASTE (no feathering)", i); + featherLogCount++; + } + } + + // Copy composition target to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrFinalOutput[i]->resource.get(), 0, &outBox); + } else { + // Direct copy DLSS output to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + } } if (state->frameAnnotations) @@ -941,7 +1470,11 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) } void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX) + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY, + uint32_t depthWidth, uint32_t depthHeight, + uint32_t colorWidth, uint32_t colorHeight, + ID3D11ShaderResourceView* fallbackSRV, uint32_t fallbackOffsetX) { if (!globals::game::isVR) return; @@ -952,7 +1485,7 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe vrClearHMDMaskCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/ClearHMDMaskCS.hlsl", {}, "cs_5_0")); D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = 16; // 4 uints + cbDesc.ByteWidth = 32; // 8 uints (offsets + optional scaling dimensions) cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -965,8 +1498,9 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->CSSetShader(vrClearHMDMaskCS.get(), nullptr, 0); - ID3D11ShaderResourceView* srvs[1] = { depthSRV }; - context->CSSetShaderResources(0, 1, srvs); + // t0 = depth, t1 = fallback (nullptr → unbound → reads return (0,0,0,0) → black) + ID3D11ShaderResourceView* srvs[2] = { depthSRV, fallbackSRV }; + context->CSSetShaderResources(0, 2, srvs); ID3D11UnorderedAccessView* uavs[1] = { colorUAV }; context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); @@ -974,9 +1508,10 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe D3D11_MAPPED_SUBRESOURCE mapped{}; context->Map(vrClearHMDMaskCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - uint32_t offsets[4] = { depthOffsetX, colorOffsetX, 0, 0 }; + uint32_t cbData[8] = { depthOffsetX, colorOffsetX, depthOffsetY, fallbackOffsetX, + depthWidth, depthHeight, colorWidth, colorHeight }; - memcpy(mapped.pData, offsets, sizeof(offsets)); + memcpy(mapped.pData, cbData, sizeof(cbData)); context->Unmap(vrClearHMDMaskCB.get(), 0); ID3D11Buffer* cbs[1] = { vrClearHMDMaskCB.get() }; @@ -984,13 +1519,81 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->Dispatch(dispatchX, dispatchY, 1); + // Unbind + ID3D11ShaderResourceView* nullSRVs[2] = { nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + ID3D11Buffer* nullCB[1] = { nullptr }; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } +} + +void Upscaling::FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV) +{ + if (!globals::game::isVR || !vrFinalOutput[eyeIndex]) + return; + if (!overrideSRV && !vrIntermediateColorIn[eyeIndex]) + return; + + auto context = globals::d3d::context; + + if (!vrPeripheryFillCS) { + vrPeripheryFillCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/VRPeripheryFillCS.hlsl", {}, "cs_5_0")); + + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 16; // 4 uints + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrPeripheryFillCB.put())); + + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&samplerDesc, vrLinearSampler.put())); + } + + if (vrPeripheryFillCS) { + auto dispatchX = (dstWidth + 7) / 8; + auto dispatchY = (dstHeight + 7) / 8; + + context->CSSetShader(vrPeripheryFillCS.get(), nullptr, 0); + + // Read from overrideSRV (e.g. TAA texture) or default render-res per-eye input. + ID3D11ShaderResourceView* srvs[1] = { overrideSRV ? overrideSRV : vrIntermediateColorIn[eyeIndex]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[eyeIndex]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrPeripheryFillCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + uint32_t cbData[4] = { srcWidth, srcHeight, dstWidth, dstHeight }; + memcpy(mapped.pData, cbData, sizeof(cbData)); + context->Unmap(vrPeripheryFillCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrPeripheryFillCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + + context->Dispatch(dispatchX, dispatchY, 1); + // Unbind ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; ID3D11Buffer* nullCB[1] = { nullptr }; + ID3D11SamplerState* nullSampler[1] = { nullptr }; context->CSSetShaderResources(0, 1, nullSRV); context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetSamplers(0, 1, nullSampler); context->CSSetShader(nullptr, nullptr, 0); } } @@ -1101,6 +1704,10 @@ void Upscaling::ConfigureUpscaling(RE::BSGraphics::State* a_viewport) // Disable dynamic resolution unless the game explicitly enables it if (!globals::game::isVR) runtimeData.dynamicResolutionLock = 1; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // No longer need to force-disable culling when upscaling is active. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs. } void Upscaling::SetupResources() @@ -1145,6 +1752,7 @@ void Upscaling::SetupResources() depthStencilDesc.BackFace.StencilDepthFailOp = depthStencilDesc.FrontFace.StencilDepthFailOp; depthStencilDesc.BackFace.StencilPassOp = depthStencilDesc.FrontFace.StencilPassOp; depthStencilDesc.BackFace.StencilFunc = depthStencilDesc.FrontFace.StencilFunc; + } else { depthStencilDesc.StencilEnable = false; // Disable stencil testing } @@ -1198,6 +1806,16 @@ void Upscaling::ClearShaderCache() depthRefractionUpscalePS = nullptr; // com_ptr automatically releases underwaterMaskUpscalePS = nullptr; // com_ptr automatically releases upscaleVS = nullptr; // com_ptr automatically releases + vrClearHMDMaskCS = nullptr; + vrPeripheryFillCS = nullptr; + vrPeripheryFillCB = nullptr; + vrFeatheredCompositeCS = nullptr; + vrFeatheredCompositeCB = nullptr; + vrFeatheredCompositePS = nullptr; + vrFeatheredCompositeBlendState = nullptr; + vrDlssCompositePS = nullptr; + vrDlssUpscalePS = nullptr; + vrDlssUpscaleCB = nullptr; } void Upscaling::CopySharedD3D12Resources() @@ -1408,6 +2026,17 @@ bool Upscaling::IsUpscalingActive() const return resolutionScale.x < .99f; } +std::vector Upscaling::GetActiveConstraints() const +{ + std::vector constraints; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs, + // so we no longer need to constrain depth buffer culling when upscaling is active. + + return constraints; +} + /** * @brief Retrieves the current frame time for frame generation. * @@ -1519,7 +2148,7 @@ Upscaling::BlurResources Upscaling::GetBlurResources() const return {}; } -void Upscaling::Upscale() +void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) { auto upscaleMethod = GetUpscaleMethod(); @@ -1580,8 +2209,11 @@ void Upscaling::Upscale() { state->BeginPerfEvent("Upscaling"); + // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) + ID3D11Resource* colorSrc = colorSourceOverride ? static_cast(colorSourceOverride) : static_cast(main.texture); + if (upscaleMethod == UpscaleMethod::kDLSS) { - streamline.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); + streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); } else if (upscaleMethod == UpscaleMethod::kFSR) { fidelityFX.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVector.texture, settings.sharpnessFSR); } @@ -1719,9 +2351,10 @@ void Upscaling::UpscaleDepth() // Skip alias copies to reduce unnecessary copy churn. copyIfNonAliased(depthCopy.texture, depth.texture); - // Clear stencil to be 0xFF + // Clear stencil to 0x00 for VR — the VR shader path discards pixels with + // stencil > 0x00, so 0x00 means "all pixels valid" (process entire display-res). if (globals::game::isVR) { - context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0xFF); + context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0x00); } // Set depth stencil state to write 0x00 @@ -1821,20 +2454,91 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 if (upscaling.d3d12SwapChainActive && upscaling.settings.frameGenerationMode) upscaling.CopySharedD3D12Resources(); - if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) - upscaling.PerformUpscaling(); + // Increment diagnostic counter (rate-limits TAAReorder logging) + if (TAAReorder::g_initialized) { + TAAReorder::g_diagCounter = (TAAReorder::g_diagCounter + 1) % TAAReorder::DIAG_INTERVAL; + if (TAAReorder::g_diagCounter == 0) { + TAAReorder::g_frameSeqCounter = 0; + logger::info("[SEQ] Main_PostProcessing START seq={}", TAAReorder::g_frameSeqCounter++); + } + } - if (upscaleMethod == UpscaleMethod::kDLSS) - upscaling.ApplySharpening(); + bool peripheryTAA = TAAReorder::ShouldReorderTAA(); + + if (peripheryTAA) { + // ─── Periphery TAA with post-conductor DLSS (PureDark's approach) ─── + // func() with TAA enabled → conductor runs all passes unimpeded: + // Phase 2A: ExecutePassHook captures post-PP intermediate to g_postPPCopy + // Phase 5: TAA + DRS → submit texture + // After conductor: ConductorCallHook evaluates DLSS on g_postPPCopy, + // then pastes DLSS center onto submit texture + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + + // Reset per-frame flags + TAAReorder::g_postPPReady = false; + TAAReorder::g_dlssReady = false; + TAAReorder::g_dlssPasteComplete = false; + TAAReorder::g_phase5Complete = false; + TAAReorder::g_bsHookCallCount = 0; + + if (TAAReorder::g_diagCounter == 0) + logger::info("[TAAReorder] peripheryTAA: running func() with TAA enabled..."); + + // Clear stencil marks left by VRStereoOptimizations to prevent TAA interference + if (globals::features::vrStereoOptimizations.loaded) { + auto renderer = globals::game::renderer; + auto& depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depth.views[0]) + globals::d3d::context->ClearDepthStencilView(depth.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0); + } - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + // Set TAA high-frequency response for periphery quality + auto fTAAHighFreq = RE::GetINISetting("fTAAHighFreq:Display"); + float savedHF = fTAAHighFreq ? fTAAHighFreq->data.f : 0.0f; + if (fTAAHighFreq) + fTAAHighFreq->data.f = 1.0f; + + // func() with TAA ENABLED — DLSS eval + paste in ConductorCallHook (post-conductor) + BSImagespaceShaderISTemporalAA->taaEnabled = true; + func(a_this, a3, a_target, a_4, a_5); + + // Restore original TAA HF value + if (fTAAHighFreq) + fTAAHighFreq->data.f = savedHF; - BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod == UpscaleMethod::kTAA; + // Lock DRS + update camera (after conductor completes) + auto& runtimeData = globals::game::graphicsState->GetRuntimeData(); + runtimeData.dynamicResolutionLock = 1; + UpdateCameraData(); + + // Disable TAA for remainder of frame + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } else { + // ─── Normal flow (no periphery TAA) ─── + if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) + upscaling.PerformUpscaling(); + + if (upscaleMethod == UpscaleMethod::kDLSS) + upscaling.ApplySharpening(); + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); - func(a_this, a3, a_target, a_4, a_5); + BSImagespaceShaderISTemporalAA->taaEnabled = (upscaleMethod == UpscaleMethod::kTAA); + + if (TAAReorder::g_diagCounter == 0 && TAAReorder::g_initialized) + logger::info("[DIAG] Normal DLSS flow: taaEnabled={}, running func()...", BSImagespaceShaderISTemporalAA->taaEnabled); + + func(a_this, a3, a_target, a_4, a_5); + + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } - BSImagespaceShaderISTemporalAA->taaEnabled = false; + // VR CAS sharpening (after TAA) + if (REL::Module::IsVR() && globals::features::vrStereoOptimizations.loaded) + globals::features::vrStereoOptimizations.ApplyCAS(a_target); } void Upscaling::SetScissorRect::thunk(RE::BSGraphics::Renderer* This, int a_left, int a_top, int a_right, int a_bottom) diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index 6cecf6cbaf..a57bb28b6f 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -57,8 +57,12 @@ struct Upscaling : Feature uint streamlineLogLevel = 0; // 0=Off, 1=Default, 2=Verbose float sharpnessFSR = 0.0f; float sharpnessDLSS = 0.0f; - uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M - uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M + uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) + uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) + float vrDlssCropOffsetX = 0.0f; // 0.0-0.3, nasal offset fraction for DLSS crop position + float vrDlssFeatherWidth = 0.0f; // 0.0-0.1, feather width fraction at DLSS crop boundary (disabled pending fix) }; Settings settings; @@ -110,6 +114,7 @@ struct Upscaling : Feature virtual void Load() override; virtual void PostPostLoad() override; virtual void SetupResources() override; + virtual std::vector GetActiveConstraints() const override; UpscaleMethod GetUpscaleMethod() const; @@ -138,7 +143,11 @@ struct Upscaling : Feature winrt::com_ptr vrClearHMDMaskCB; // Helper to dispatch mask clearing for a single eye region void ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX); + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY = 0, + uint32_t depthWidth = 0, uint32_t depthHeight = 0, + uint32_t colorWidth = 0, uint32_t colorHeight = 0, + ID3D11ShaderResourceView* fallbackSRV = nullptr, uint32_t fallbackOffsetX = 0); // Shared VR Per-Eye Intermediate Buffers // Owned here so both Streamline (DLSS) and FidelityFX (FSR) can use them. @@ -148,6 +157,43 @@ struct Upscaling : Feature eastl::unique_ptr vrIntermediateMotionVectors[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateReactiveMask[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateTransparencyMask[2]; // per-eye render resolution + eastl::unique_ptr vrFinalOutput[2]; // per-eye display-res composition target (VR viewport scaling) + eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) + + // Periphery TAA (conductor approach) — used by two-call func() flow + winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) + eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) + + // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) + winrt::com_ptr vrPeripheryFillCS; + winrt::com_ptr vrPeripheryFillCB; + winrt::com_ptr vrLinearSampler; + + // Feathered composite compute shader (legacy, kept as fallback) + winrt::com_ptr vrFeatheredCompositeCS; + winrt::com_ptr vrFeatheredCompositeCB; + + // Feathered composite pixel shader approach (replaces CS to preserve periphery TAA) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + winrt::com_ptr vrFeatheredCompositePS; + winrt::com_ptr vrFeatheredCompositeBlendState; + + // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) + winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) + winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) + winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params + ID3D11PixelShader* GetDlssCompositePS(); + ID3D11PixelShader* GetDlssUpscalePS(); + + struct DlssCompositeCB + { + float2 DynResScale; // renderRes / displayRes per-eye + float2 EyeOffset; // (i * eyeWidth, 0) + float2 SrcTexSize; // full texture dimensions + float2 pad; + }; + void FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV = nullptr); // Helper to create/resize per-eye buffers matching source formats void CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight, uint32_t outWidth, uint32_t outHeight, @@ -160,11 +206,11 @@ struct Upscaling : Feature // Shared Pipeline Steps void PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* depthSrc, ID3D11Resource* mvecSrc, ID3D11Resource* reactiveSrc, ID3D11Resource* transparencySrc); - void FinalizePerEyeOutputs(ID3D11Resource* colorDst); + void FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only = false); void ConfigureTAA(); void ConfigureUpscaling(RE::BSGraphics::State* a_state); - void Upscale(); + void Upscale(ID3D11Texture2D* colorSourceOverride = nullptr); // D3D11 textures Texture2D* reactiveMaskTexture = nullptr; diff --git a/src/Features/Upscaling/Streamline.cpp b/src/Features/Upscaling/Streamline.cpp index 71eb3a3542..832f5714ae 100644 --- a/src/Features/Upscaling/Streamline.cpp +++ b/src/Features/Upscaling/Streamline.cpp @@ -7,6 +7,7 @@ #include "../../Hooks.h" #include "../../State.h" #include "../../Util.h" +#include "../TAAReorder.h" #include "../Upscaling.h" #include "DX12SwapChain.h" @@ -237,6 +238,20 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.cameraMotionIncluded = sl::Boolean::eTrue; slConstants.cameraPinholeOffset = { 0.f, 0.f }; + + // VR nasal offset: when the crop is shifted, tell DLSS the optical center is offset + if (globals::game::isVR) { + float nasalFrac = globals::features::upscaling.settings.vrDlssCropOffsetX; + float vpScale = globals::features::upscaling.settings.vrDlssViewportScale; + if (nasalFrac > 0.0f && vpScale < 1.0f) { + // Pinhole offset in NDC: how far the crop center is from the eye's optical axis + // Eye 0: shifted right (+X), Eye 1: shifted left (-X) + float shiftNDC = nasalFrac / vpScale; // normalized to crop width + float sign = (eyeIndex == 0) ? 1.0f : -1.0f; + slConstants.cameraPinholeOffset = { sign * shiftNDC, 0.f }; + } + } + slConstants.cameraRight = { viewMatrix._11, viewMatrix._12, viewMatrix._13 }; slConstants.cameraUp = { viewMatrix._21, viewMatrix._22, viewMatrix._23 }; slConstants.cameraFwd = { viewMatrix._31, viewMatrix._32, viewMatrix._33 }; @@ -245,6 +260,28 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.depthInverted = sl::Boolean::eFalse; if (globals::game::isVR) { + // When VR viewport scaling is active, DLSS processes a centered sub-region of each eye. + // The projection matrix must be adjusted to describe only the crop's FOV, not the full eye. + // Without this, DLSS's temporal reprojection maps pixels to wrong world positions, + // destroying temporal accumulation (causing aliasing and instability). + // Scaling rows 0 and 1 of the projection by 1/vpScale narrows the clip-space X/Y + // to match the crop region. clipToPrevClip must also be conjugated (see below). + float vpScale = globals::features::upscaling.settings.vrDlssViewportScale; + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Row 0 → clip.x, Row 1 → clip.y (Streamline row-major, P * pos convention) + slConstants.cameraViewToClip[0].x *= invScale; + slConstants.cameraViewToClip[0].y *= invScale; + slConstants.cameraViewToClip[0].z *= invScale; + slConstants.cameraViewToClip[0].w *= invScale; + slConstants.cameraViewToClip[1].x *= invScale; + slConstants.cameraViewToClip[1].y *= invScale; + slConstants.cameraViewToClip[1].z *= invScale; + slConstants.cameraViewToClip[1].w *= invScale; + // Narrow the reported FOV to match the crop + slConstants.cameraFOV = 2.0f * atanf(vpScale * tanf(slConstants.cameraFOV * 0.5f)); + } + // VR: compute clipToCameraView / clipToPrevClip / prevClipToClip from Skyrim's per-eye matrices. // recalculateCameraMatrices() uses a single static prev-frame slot -- unusable for two viewports. sl::matrixFullInvert(slConstants.clipToCameraView, slConstants.cameraViewToClip); @@ -258,7 +295,62 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye sl::float4x4 invCurrViewProj; sl::matrixFullInvert(invCurrViewProj, currViewProjSL); sl::matrixMul(slConstants.clipToPrevClip, invCurrViewProj, prevViewProjSL); + + // When viewport scaling is active, cameraViewToClip is adjusted (narrower FOV), + // changing the clip space. clipToPrevClip (computed from unadjusted VP) maps between + // unadjusted clip spaces. We must conjugate it to map between adjusted clip spaces: + // CTP_adj = inv(S) * CTP * S + // where S = diag(invScale, invScale, 1, 1), inv(S) = diag(vpScale, vpScale, 1, 1). + // + // Derivation (row-vector convention: clip = view * P): + // clip_adj = clip_unadj * S (scaling rows 0,1 of P scales clip x,y by invScale) + // clip_prev_adj = clip_prev_unadj * S + // clip_prev_unadj = clip_curr_unadj * CTP + // clip_prev_adj = (clip_curr_adj * inv(S)) * CTP * S = clip_curr_adj * (inv(S) * CTP * S) + // + // Element-wise: CTP_adj[i][j] = inv(S)[i] * CTP[i][j] * S[j] + // Rows 0,1, cols 0,1: vpScale * invScale = 1 (unchanged) + // Rows 0,1, cols 2,3: vpScale * 1 = vpScale + // Rows 2,3, cols 0,1: 1 * invScale = invScale + // Rows 2,3, cols 2,3: unchanged + // + // This ensures clipToPrevClip agrees with per-pixel motion vectors. + // Without correct conjugation, DLSS sees disagreement between the camera-predicted + // motion and per-pixel motion vectors, causing it to reject temporal accumulation + // during camera motion. (When still, CTP ≈ I, and inv(S)*I*S = I → no mismatch.) + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Rows 0,1 cols 2,3: multiply by vpScale (from left-multiply by inv(S)) + slConstants.clipToPrevClip[0].z *= vpScale; + slConstants.clipToPrevClip[0].w *= vpScale; + slConstants.clipToPrevClip[1].z *= vpScale; + slConstants.clipToPrevClip[1].w *= vpScale; + // Rows 2,3 cols 0,1: multiply by invScale (from right-multiply by S) + slConstants.clipToPrevClip[2].x *= invScale; + slConstants.clipToPrevClip[2].y *= invScale; + slConstants.clipToPrevClip[3].x *= invScale; + slConstants.clipToPrevClip[3].y *= invScale; + } + sl::matrixFullInvert(slConstants.prevClipToClip, slConstants.clipToPrevClip); + + // Per-eye diagnostic logging for temporal quality investigation + { + static uint32_t ctpDiagCounter = 0; + bool ctpDiag = (ctpDiagCounter++ % 300 == 0) || (TAAReorder::g_diagCounter == 0 && vpScale < 1.0f); + if (ctpDiag) { + auto& ctp = slConstants.clipToPrevClip; + logger::info("[DLSS-CTP] Eye {} clipToPrevClip diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, ctp[0].x, ctp[1].y, ctp[2].z, ctp[3].w); + logger::info("[DLSS-CTP] Eye {} prevVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, prevViewProjSL[0].x, prevViewProjSL[1].y, prevViewProjSL[2].z, prevViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} currVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, currViewProjSL[0].x, currViewProjSL[1].y, currViewProjSL[2].z, currViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} cameraPos=({:.2f},{:.2f},{:.2f}) fov={:.4f} mvecScale=({:.4f},{:.4f})", + eyeIndex, slConstants.cameraPos.x, slConstants.cameraPos.y, slConstants.cameraPos.z, + slConstants.cameraFOV, slConstants.mvecScale.x, slConstants.mvecScale.y); + } + } } else { recalculateCameraMatrices(slConstants); } @@ -268,7 +360,26 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.jitterOffset = { -jitter.x, -jitter.y }; slConstants.reset = sl::Boolean::eFalse; - slConstants.mvecScale = { 1.0f, 1.0f }; + // mvecScale normalizes motion vectors to [-1,1] range. The Streamline DLSS plugin + // then multiplies by the input render dimensions to get pixel displacement: + // MV_Scale = mvecScale * renderWidth + // The game's motion vectors are in [-1,1] normalized to the FULL per-eye dimensions. + // Without viewport scaling, renderWidth = eyeWidthIn → MV_Scale = eyeWidthIn → correct. + // With viewport scaling, renderWidth = cropWidthIn = eyeWidthIn * vpScale, so DLSS + // underestimates motion by vpScale. Compensate by scaling mvecScale by 1/vpScale. + if (globals::game::isVR && globals::features::upscaling.settings.vrDlssViewportScale < 1.0f) { + float invScale = 1.0f / globals::features::upscaling.settings.vrDlssViewportScale; + slConstants.mvecScale = { invScale, invScale }; + } else { + slConstants.mvecScale = { 1.0f, 1.0f }; + } + // Log mvecScale after assignment (was previously logged before assignment, showing uninitialized values) + if (globals::game::isVR && TAAReorder::g_diagCounter == 0 && globals::features::upscaling.settings.vrDlssViewportScale < 1.0f) { + logger::info("[TAAReorder] Eye {} mvecScale=({:.4f},{:.4f}) jitter=({:.4f},{:.4f})", + eyeIndex, slConstants.mvecScale.x, slConstants.mvecScale.y, + slConstants.jitterOffset.x, slConstants.jitterOffset.y); + } + slConstants.motionVectors3D = sl::Boolean::eFalse; slConstants.motionVectorsInvalidValue = FLT_MIN; slConstants.orthographicProjection = sl::Boolean::eFalse; @@ -277,6 +388,13 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye if (SL_FAILED(res, slSetConstants(slConstants, *frameToken, p_viewport))) { logger::error("[Streamline] Could not set constants for eye {}", eyeIndex); + } else { + static uint32_t constDiagCounter = 0; + if (constDiagCounter++ % 300 == 0) { + logger::info("[Streamline] slSetConstants OK eye={} jitter=({:.4f},{:.4f}) fov={:.4f}", + eyeIndex, slConstants.jitterOffset.x, slConstants.jitterOffset.y, + slConstants.cameraFOV); + } } } @@ -304,7 +422,7 @@ bool Streamline::IsRTXAndBelow40Series(IDXGIAdapter* a_adapter) return false; } -void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) +void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height) { sl::DLSSOptions dlssOptions{}; @@ -328,10 +446,8 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) break; } - auto state = globals::state; - dlssOptions.outputWidth = width; - dlssOptions.outputHeight = (uint)state->screenSize.y; + dlssOptions.outputHeight = height; // Detect HDR from kMAIN format at runtime -- VR kMAIN may be 8-bit while SE is FP16 { @@ -394,7 +510,7 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth) + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight) { auto context = globals::d3d::context; @@ -406,7 +522,7 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, sl::Resource transparencyMaskRes = { sl::ResourceType::eTex2d, transparencyMask, 0 }; CheckFrameConstants(vp, eyeIndex); - SetDLSSOptions(vp, outputWidth); + SetDLSSOptions(vp, outputWidth, outputHeight); sl::ResourceTag tags[] = { { &colorInRes, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &extentIn }, @@ -438,17 +554,24 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, if (state->frameAnnotations) state->EndPerfEvent(); - if (evalResult != sl::Result::eOk) { - static bool evalErrorLogged[2] = { false, false }; + // Rate-limited diagnostic logging for DLSS evaluation results + { + static uint32_t evalDiagCounter[2] = { 0, 0 }; uint32_t logIdx = globals::game::isVR ? eyeIndex : 0; - if (!evalErrorLogged[logIdx]) { - evalErrorLogged[logIdx] = true; - logger::error("[Streamline] slEvaluateFeature failed{} result={}", globals::game::isVR ? std::format(" for eye {}", eyeIndex) : "", (int)evalResult); + bool diagLog = (evalDiagCounter[logIdx]++ % 300 == 0); + + if (evalResult != sl::Result::eOk) { + if (diagLog) { + logger::error("[Streamline] slEvaluateFeature FAILED eye={} result={} (frame {})", + eyeIndex, (int)evalResult, evalDiagCounter[logIdx]); + } + } else if (diagLog) { + logger::info("[Streamline] slEvaluateFeature OK eye={} (frame {})", eyeIndex, evalDiagCounter[logIdx]); } } } -void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors) +void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only) { auto state = globals::state; @@ -458,9 +581,15 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r auto screenSize = state->screenSize; auto renderSize = Util::ConvertToDynamic(screenSize); - // VR: Combined-buffer mode with extent offsets causes temporal ghosting on the right eye - // because DLSS's internal history buffers use extent offsets as indices. - // Per-eye isolation with extents at {0,0} is required. + // VR: Per-eye isolation is required. Each eye uses a separate per-eye texture + // with its own viewport handle, avoiding cross-eye history contamination. + // When viewport scaling is active (vrDlssViewportScale < 1.0): + // - All DLSS inputs are physically cropped to the center sub-region at {0,0}. + // This eliminates non-zero subrect base offsets which break temporal reprojection. + // - Camera matrices are adjusted in CheckFrameConstants to match the crop's FOV. + // - FillPeriphery bilinear-upscales the full render-res input to vrFinalOutput, + // then FinalizePerEyeOutputs pastes the DLSS crop output into the center. + // When viewport scaling is off (scale == 1.0), all textures are full-size at {0,0}. if (globals::game::isVR) { auto& upscaling = globals::features::upscaling; uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); @@ -468,21 +597,50 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; + float vpScale = upscaling.settings.vrDlssViewportScale; + bool viewportScaling = vpScale < 1.0f; + + uint32_t dlssWidthIn = viewportScaling ? (uint32_t)(eyeWidthIn * vpScale) : eyeWidthIn; + uint32_t dlssHeightIn = viewportScaling ? (uint32_t)(eyeHeightIn * vpScale) : eyeHeightIn; + uint32_t dlssWidthOut = viewportScaling ? (uint32_t)(eyeWidthOut * vpScale) : eyeWidthOut; + uint32_t dlssHeightOut = viewportScaling ? (uint32_t)(eyeHeightOut * vpScale) : eyeHeightOut; + upscaling.PreparePerEyeInputs(a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask); - for (uint32_t i = 0; i < 2; ++i) { + // Periphery TAA diagnostic + if (TAAReorder::g_diagCounter == 0 && viewportScaling && upscaling.settings.vrPeripheryTAA) { + logger::info("[TAAReorder] Periphery TAA: vrTAAdPerEye[0]={}, g_initialized={} (TAA injected at display RT level)", + (void*)upscaling.vrTAAdPerEye[0].get(), TAAReorder::g_initialized); + } + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { sl::ViewportHandle vp = (i == 1) ? viewportRight : viewport; - sl::Extent extentIn{ 0, 0, eyeWidthIn, eyeHeightIn }; - sl::Extent extentOut{ 0, 0, eyeWidthOut, eyeHeightOut }; + + if (viewportScaling) { + // Pre-fill composition target with bilinear upscale of full render-res eye. + // DLSS output is pasted on top in FinalizePerEyeOutputs. + upscaling.FillPeriphery(i, eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + } + + // All extents are {0,0} - inputs are physically crop-sized (or full-sized when not scaling). + // No non-zero subrect base offsets, which is critical for DLSS temporal reprojection. + sl::Extent extentIn = { 0, 0, dlssWidthIn, dlssHeightIn }; + sl::Extent extentOut = { 0, 0, dlssWidthOut, dlssHeightOut }; + + // When viewport scaling, use crop-sized vrCropColorIn; otherwise use full vrIntermediateColorIn + ID3D11Resource* colorInput = viewportScaling ? + upscaling.vrCropColorIn[i]->resource.get() : + upscaling.vrIntermediateColorIn[i]->resource.get(); EvaluateDLSS(vp, i, - upscaling.vrIntermediateColorIn[i]->resource.get(), upscaling.vrIntermediateColorOut[i]->resource.get(), + colorInput, upscaling.vrIntermediateColorOut[i]->resource.get(), upscaling.vrIntermediateDepth[i]->resource.get(), upscaling.vrIntermediateMotionVectors[i]->resource.get(), upscaling.vrIntermediateReactiveMask[i]->resource.get(), upscaling.vrIntermediateTransparencyMask[i]->resource.get(), - extentIn, extentOut, eyeWidthOut); + extentIn, extentOut, dlssWidthOut, dlssHeightOut); } - upscaling.FinalizePerEyeOutputs(a_upscalingTexture); + upscaling.FinalizePerEyeOutputs(a_upscalingTexture, eye0Only); } else { // Non-VR: Simple full-texture upscale sl::Extent extentIn{ 0, 0, (uint)renderSize.x, (uint)renderSize.y }; @@ -491,7 +649,7 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r EvaluateDLSS(viewport, 0, a_upscalingTexture, a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask, - extentIn, extentOut, (uint)screenSize.x); + extentIn, extentOut, (uint)screenSize.x, (uint)screenSize.y); } } /** diff --git a/src/Features/Upscaling/Streamline.h b/src/Features/Upscaling/Streamline.h index 0f771fb9a0..348f0b6849 100644 --- a/src/Features/Upscaling/Streamline.h +++ b/src/Features/Upscaling/Streamline.h @@ -70,7 +70,7 @@ class Streamline void EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth); + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight); // Cached DLL version info for Streamline plugin directory static std::vector> dllVersions; @@ -85,9 +85,9 @@ class Streamline bool IsRTXAndBelow40Series(IDXGIAdapter* a_adapter); - void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width); + void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height); - void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors); + void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only = false); void DestroyDLSSResources(); }; diff --git a/src/Features/VR.cpp b/src/Features/VR.cpp index e6ed6af7bb..a35c20d377 100644 --- a/src/Features/VR.cpp +++ b/src/Features/VR.cpp @@ -88,6 +88,12 @@ void VR::SetupResources() if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", edgeDetectionDefines, "cs_5_0"))) stereoBlendDebugEdgeDetectionCS.attach(rawPtr); + // Overwrite mode: direct replacement instead of blend (for stencil culling) + auto overwriteDefines = defines; + overwriteDefines.push_back({ "STEREO_OVERWRITE", "" }); + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", overwriteDefines, "cs_5_0"))) + stereoBlendOverwriteCS.attach(rawPtr); + auto renderer = globals::game::renderer; auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; D3D11_TEXTURE2D_DESC mainDesc; diff --git a/src/Features/VR.h b/src/Features/VR.h index e8372cbb6f..07e3a1609e 100644 --- a/src/Features/VR.h +++ b/src/Features/VR.h @@ -260,7 +260,7 @@ struct VR : OverlayFeature StereoBlendDepthSigma = std::clamp(StereoBlendDepthSigma, 0.001f, 0.1f); StereoBlendMaxFactor = std::clamp(StereoBlendMaxFactor, 0.0f, 0.5f); StereoBlendColorThreshold = std::clamp(StereoBlendColorThreshold, 0.0f, 0.2f); - StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 3); + StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 5); } }; @@ -358,8 +358,10 @@ struct VR : OverlayFeature winrt::com_ptr stereoBlendDebugBackCheckCS; winrt::com_ptr stereoBlendDebugBlendWeightCS; winrt::com_ptr stereoBlendDebugEdgeDetectionCS; + winrt::com_ptr stereoBlendOverwriteCS; eastl::unique_ptr stereoBlendCopyTex; eastl::unique_ptr stereoBlendCB; + winrt::com_ptr stereoBlendLinearSampler; struct alignas(16) StereoBlendCB { @@ -368,7 +370,11 @@ struct VR : OverlayFeature float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint32_t DebugMode; + float FullBlendDistance; + float POMDepthScale; + float _pad; }; // Engine hook integration points diff --git a/src/Features/VR/SettingsUI.cpp b/src/Features/VR/SettingsUI.cpp index c4ea6a562f..3e15342846 100644 --- a/src/Features/VR/SettingsUI.cpp +++ b/src/Features/VR/SettingsUI.cpp @@ -323,7 +323,7 @@ namespace ImGui::Separator(); - const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection" }; + const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection", "Overwrite", "Overwrite Eye1" }; ImGui::Combo("Debug View", &settings.StereoBlendDebugMode, debugModes, IM_ARRAYSIZE(debugModes)); if (auto _tt = Util::HoverTooltipWrapper()) { ImGui::Text( diff --git a/src/Features/VR/StereoBlend.cpp b/src/Features/VR/StereoBlend.cpp index 1fa5d22240..61ce5763f7 100644 --- a/src/Features/VR/StereoBlend.cpp +++ b/src/Features/VR/StereoBlend.cpp @@ -1,9 +1,12 @@ #include "Features/VR.h" +#include "Deferred.h" #include "Features/DynamicCubemaps.h" #include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" +#include "Features/VRStereoOptimizations.h" #include "State.h" +#include "Utils/D3D.h" void VR::ClearShaderCache() { @@ -11,6 +14,7 @@ void VR::ClearShaderCache() stereoBlendDebugBackCheckCS = nullptr; stereoBlendDebugBlendWeightCS = nullptr; stereoBlendDebugEdgeDetectionCS = nullptr; + stereoBlendOverwriteCS = nullptr; } bool VR::AnyScreenSpaceEffectLoaded() @@ -22,10 +26,20 @@ bool VR::AnyScreenSpaceEffectLoaded() void VR::DrawStereoBlend() { - if (!REL::Module::IsVR() || !settings.EnableStereoBlend || !stereoBlendCS || !stereoBlendCopyTex || !stereoBlendCB) + bool vrStereoOptActive = globals::features::vrStereoOptimizations.loaded && + globals::features::vrStereoOptimizations.settings.stereoMode != VRStereoOptimizations::StereoMode::Off && + stereoBlendOverwriteCS; + + if (!REL::Module::IsVR() || !stereoBlendCopyTex || !stereoBlendCB) + return; + + if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugSkipMerge) + return; + + if (!vrStereoOptActive && (!settings.EnableStereoBlend || !stereoBlendCS)) return; - if (!AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) + if (!vrStereoOptActive && !AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) return; ZoneScoped; @@ -40,7 +54,6 @@ void VR::DrawStereoBlend() auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; auto* depthSRV = Util::GetCurrentSceneDepthSRV(); - // Copy main color to read-only texture to avoid read/write race between eyes context->CopyResource(stereoBlendCopyTex->resource.get(), main.texture); auto dispatchCount = Util::GetScreenDispatchCount(true); @@ -55,37 +68,118 @@ void VR::DrawStereoBlend() cbData.MaxBlendFactor = settings.StereoBlendMaxFactor; cbData.ColorDiffThreshold = settings.StereoBlendColorThreshold; + // Pass debug edge tint from VRStereoOptimizations settings + if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugVisualization) + cbData.DebugEdgeTint = 0.3f; + else + cbData.DebugEdgeTint = 0.0f; + + // Debug mode: 0=normal, 1=depth map diagnostic, 2=full blend depth visualizer + if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugDepthMap) + cbData.DebugMode = 1u; + else if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugFullBlendDepth) + cbData.DebugMode = 2u; + else if (vrStereoOptActive && globals::features::vrStereoOptimizations.settings.debugPOMDepth) + cbData.DebugMode = 3u; + else + cbData.DebugMode = 0u; + + cbData.FullBlendDistance = vrStereoOptActive ? globals::features::vrStereoOptimizations.settings.fullBlendDistance : 0.0f; + cbData.POMDepthScale = vrStereoOptActive ? globals::features::vrStereoOptimizations.settings.pomDepthScale : 1.0f; + stereoBlendCB->Update(cbData); auto cbPtr = stereoBlendCB->CB(); - ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; - ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + auto& motionVectors = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR]; + + bool isOverwriteMode = vrStereoOptActive; ID3D11ComputeShader* activeCS = stereoBlendCS.get(); - if (settings.StereoBlendDebugMode == 1 && stereoBlendDebugBackCheckCS) - activeCS = stereoBlendDebugBackCheckCS.get(); - else if (settings.StereoBlendDebugMode == 2 && stereoBlendDebugBlendWeightCS) - activeCS = stereoBlendDebugBlendWeightCS.get(); - else if (settings.StereoBlendDebugMode == 3 && stereoBlendDebugEdgeDetectionCS) - activeCS = stereoBlendDebugEdgeDetectionCS.get(); + if (vrStereoOptActive) { + activeCS = stereoBlendOverwriteCS.get(); + } else { + int effectiveMode = settings.StereoBlendDebugMode; + if (effectiveMode == 1 && stereoBlendDebugBackCheckCS) + activeCS = stereoBlendDebugBackCheckCS.get(); + else if (effectiveMode == 2 && stereoBlendDebugBlendWeightCS) + activeCS = stereoBlendDebugBlendWeightCS.get(); + else if (effectiveMode == 3 && stereoBlendDebugEdgeDetectionCS) + activeCS = stereoBlendDebugEdgeDetectionCS.get(); + } + + // Save and unbind DSV to avoid SRV/DSV conflict on depth buffer in overwrite mode + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + if (isOverwriteMode) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + } + ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; context->CSSetConstantBuffers(1, 1, &cbPtr); context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetShader(activeCS, nullptr, 0); + if (isOverwriteMode) { + ID3D11ShaderResourceView* modeSRV = globals::features::vrStereoOptimizations.GetModeTextureSRV(); + if (modeSRV) + context->CSSetShaderResources(2, 1, &modeSRV); + + // Bind REFLECTANCE SRV for POM depth offset (stored in .w by Lighting pass) + auto& reflectanceRT = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; + context->CSSetShaderResources(3, 1, &reflectanceRT.SRV); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, motionVectors.UAV }; + context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + } else { + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + } + + // Bind linear sampler for hardware bilinear color sampling in overwrite mode + if (isOverwriteMode) { + if (!stereoBlendLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, stereoBlendLinearSampler.put()); + } + ID3D11SamplerState* samplers[] = { stereoBlendLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + } + + context->CSSetShader(activeCS, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); // Cleanup - srvs[0] = nullptr; - srvs[1] = nullptr; - uavs[0] = nullptr; - cbPtr = nullptr; - context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetConstantBuffers(1, 1, &cbPtr); + ID3D11ShaderResourceView* nullSRVs[4] = {}; + context->CSSetShaderResources(0, isOverwriteMode ? 4 : 2, nullSRVs); + ID3D11UnorderedAccessView* nullUAVs[2] = {}; + context->CSSetUnorderedAccessViews(0, isOverwriteMode ? 2 : 1, nullUAVs, nullptr); + ID3D11Buffer* nullCB = nullptr; + context->CSSetConstantBuffers(1, 1, &nullCB); + if (isOverwriteMode) { + ID3D11SamplerState* nullSampler[] = { nullptr }; + context->CSSetSamplers(0, 1, nullSampler); + } context->CSSetShader(nullptr, nullptr, 0); + // Restore DSV after CS dispatch in overwrite mode + if (isOverwriteMode && savedDSV) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + savedDSV->Release(); + } + if (globals::state->frameAnnotations) globals::state->EndPerfEvent(); } diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp new file mode 100644 index 0000000000..4b9fb4d481 --- /dev/null +++ b/src/Features/VRStereoOptimizations.cpp @@ -0,0 +1,828 @@ +#include "VRStereoOptimizations.h" + +#include "ExtendedMaterials.h" +#include "Globals.h" +#include "State.h" +#include "Utils/D3D.h" +#include "Utils/Game.h" + +#include + +// JSON enum serialization for StereoMode +NLOHMANN_JSON_SERIALIZE_ENUM(VRStereoOptimizations::StereoMode, { + { VRStereoOptimizations::StereoMode::Off, "Off" }, + { VRStereoOptimizations::StereoMode::Enable, "Enable" }, + }) + +//============================================================================= +// SETTINGS MANAGEMENT +//============================================================================= + +void VRStereoOptimizations::SaveSettings(json& o_json) +{ + o_json["StereoMode"] = settings.stereoMode; + o_json["DisocclusionDepthThreshold"] = settings.disocclusionDepthThreshold; + o_json["FullBlendDistance"] = settings.fullBlendDistance; + o_json["QualityJitterOffset"] = settings.qualityJitterOffset; + o_json["FoveatedRegionRadius"] = settings.foveatedRegionRadius; + o_json["FoveatedRegionCenterX"] = settings.foveatedRegionCenterX; + o_json["FoveatedRegionCenterY"] = settings.foveatedRegionCenterY; + o_json["UseEyeTracking"] = settings.useEyeTracking; + o_json["DebugVisualization"] = settings.debugVisualization; + o_json["DebugSkipMerge"] = settings.debugSkipMerge; + o_json["DebugForceAllStencil"] = settings.debugForceAllStencil; + o_json["DebugForceAllReprojectCS"] = settings.debugForceAllReprojectCS; + o_json["DebugDepthMap"] = settings.debugDepthMap; + o_json["MipBiasMode"] = settings.mipBiasMode; + o_json["MipLodBias"] = settings.mipLodBias; + o_json["MipBiasNearDist"] = settings.mipBiasNearDist; + o_json["MipBiasFarDist"] = settings.mipBiasFarDist; + o_json["CASStrength"] = settings.casStrength; + o_json["AlphaTestThreshold"] = settings.alphaTestThreshold; +} + +void VRStereoOptimizations::LoadSettings(json& o_json) +{ + if (o_json.contains("StereoMode")) + settings.stereoMode = o_json["StereoMode"].get(); + if (o_json.contains("DisocclusionDepthThreshold")) + settings.disocclusionDepthThreshold = o_json["DisocclusionDepthThreshold"].get(); + if (o_json.contains("QualityJitterOffset")) + settings.qualityJitterOffset = o_json["QualityJitterOffset"].get(); + if (o_json.contains("FoveatedRegionRadius")) + settings.foveatedRegionRadius = o_json["FoveatedRegionRadius"].get(); + if (o_json.contains("FoveatedRegionCenterX")) + settings.foveatedRegionCenterX = o_json["FoveatedRegionCenterX"].get(); + if (o_json.contains("FoveatedRegionCenterY")) + settings.foveatedRegionCenterY = o_json["FoveatedRegionCenterY"].get(); + if (o_json.contains("UseEyeTracking")) + settings.useEyeTracking = o_json["UseEyeTracking"].get(); + if (o_json.contains("DebugVisualization")) + settings.debugVisualization = o_json["DebugVisualization"].get(); + if (o_json.contains("DebugSkipMerge")) + settings.debugSkipMerge = o_json["DebugSkipMerge"].get(); + if (o_json.contains("DebugForceAllStencil")) + settings.debugForceAllStencil = o_json["DebugForceAllStencil"].get(); + if (o_json.contains("DebugForceAllReprojectCS")) + settings.debugForceAllReprojectCS = o_json["DebugForceAllReprojectCS"].get(); + if (o_json.contains("DebugDepthMap")) + settings.debugDepthMap = o_json["DebugDepthMap"].get(); + if (o_json.contains("FullBlendDistance")) + settings.fullBlendDistance = o_json["FullBlendDistance"].get(); + if (o_json.contains("MipBiasMode")) + settings.mipBiasMode = o_json["MipBiasMode"].get(); + // Backwards compat: old bool EnableMipBias -> mode 2 (Distant Trees) + else if (o_json.contains("EnableMipBias") && o_json["EnableMipBias"].get()) + settings.mipBiasMode = 2; + if (o_json.contains("MipLodBias")) + settings.mipLodBias = o_json["MipLodBias"].get(); + if (o_json.contains("MipBiasNearDist")) + settings.mipBiasNearDist = o_json["MipBiasNearDist"].get(); + if (o_json.contains("MipBiasFarDist")) + settings.mipBiasFarDist = o_json["MipBiasFarDist"].get(); + // CAS disabled for now — ignore saved value + // if (o_json.contains("CASStrength")) + // settings.casStrength = o_json["CASStrength"].get(); + settings.casStrength = 0.0f; + if (o_json.contains("AlphaTestThreshold")) + settings.alphaTestThreshold = o_json["AlphaTestThreshold"].get(); +} + +void VRStereoOptimizations::RestoreDefaultSettings() +{ + settings = {}; +} + +//============================================================================= +// RESOURCE SETUP +//============================================================================= + +void VRStereoOptimizations::SetupResources() +{ + if (!REL::Module::IsVR()) + return; + + auto device = globals::d3d::device; + auto renderer = globals::game::renderer; + + // Constant buffers + paramsCB = eastl::make_unique(ConstantBufferDesc()); + + // Get main RT dimensions for per-eye calculations + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + D3D11_TEXTURE2D_DESC mainDesc; + main.texture->GetDesc(&mainDesc); + + // Per-pixel mode texture (R8_UINT, full SBS resolution = both eyes) + { + D3D11_TEXTURE2D_DESC modeDesc{}; + modeDesc.Width = mainDesc.Width; + modeDesc.Height = mainDesc.Height; + modeDesc.MipLevels = 1; + modeDesc.ArraySize = 1; + modeDesc.Format = DXGI_FORMAT_R8_UINT; + modeDesc.SampleDesc.Count = 1; + modeDesc.SampleDesc.Quality = 0; + modeDesc.Usage = D3D11_USAGE_DEFAULT; + modeDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + modeDesc.CPUAccessFlags = 0; + modeDesc.MiscFlags = 0; + + texPerPixelMode = eastl::make_unique(modeDesc); + texPerPixelMode->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + texPerPixelMode->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + // Depth-stencil state for stencil write pass: + // Depth test OFF (not rendering geometry), stencil ALWAYS + REPLACE with ref=1 + { + D3D11_DEPTH_STENCIL_DESC dssDesc{}; + dssDesc.DepthEnable = FALSE; + dssDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + dssDesc.StencilEnable = TRUE; + dssDesc.StencilReadMask = 0xFF; + dssDesc.StencilWriteMask = 0xFF; + dssDesc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; + dssDesc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + dssDesc.BackFace = dssDesc.FrontFace; + + DX::ThrowIfFailed(device->CreateDepthStencilState(&dssDesc, stencilWriteDSS.put())); + } + + // Rasterizer state for stencil write: no culling, no depth clip + { + D3D11_RASTERIZER_DESC rsDesc{}; + rsDesc.FillMode = D3D11_FILL_SOLID; + rsDesc.CullMode = D3D11_CULL_NONE; + rsDesc.DepthClipEnable = FALSE; + + DX::ThrowIfFailed(device->CreateRasterizerState(&rsDesc, stencilWriteRS.put())); + } + + // Read-only depth DSV for stencil write pass: allows simultaneous depth SRV binding. + // We write stencil but never write depth, so D3D11_DSV_READ_ONLY_DEPTH is safe. + { + auto& depthData = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depthData.views[0] && depthData.texture) { + D3D11_DEPTH_STENCIL_VIEW_DESC dsvDesc{}; + depthData.views[0]->GetDesc(&dsvDesc); + dsvDesc.Flags = D3D11_DSV_READ_ONLY_DEPTH; + + DX::ThrowIfFailed(device->CreateDepthStencilView(depthData.texture, &dsvDesc, stencilWriteReadOnlyDSV.put())); + } else { + logger::warn("[VRStereoOptimizations] Could not create read-only DSV: depth stencil data not available"); + } + } + + // CAS sharpness parameter buffer (structured buffer SRV to avoid cbuffer conflicts) + { + D3D11_BUFFER_DESC bufDesc{}; + bufDesc.ByteWidth = sizeof(float); + bufDesc.Usage = D3D11_USAGE_DYNAMIC; + bufDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + bufDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + bufDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + bufDesc.StructureByteStride = sizeof(float); + + float initSharpness = settings.casStrength; + D3D11_SUBRESOURCE_DATA initData{}; + initData.pSysMem = &initSharpness; + + DX::ThrowIfFailed(device->CreateBuffer(&bufDesc, &initData, casParamsBuf.put())); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc{}; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = 1; + DX::ThrowIfFailed(device->CreateShaderResourceView(casParamsBuf.get(), &srvDesc, casParamsSRV.put())); + } + + // CAS output texture (same format as main RT, with UAV capability) + { + D3D11_TEXTURE2D_DESC casDesc{}; + casDesc.Width = mainDesc.Width; + casDesc.Height = mainDesc.Height; + casDesc.MipLevels = 1; + casDesc.ArraySize = 1; + casDesc.Format = mainDesc.Format; + casDesc.SampleDesc.Count = 1; + casDesc.SampleDesc.Quality = 0; + casDesc.Usage = D3D11_USAGE_DEFAULT; + casDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + casDesc.CPUAccessFlags = 0; + casDesc.MiscFlags = 0; + + casTex = eastl::make_unique(casDesc); + casTex->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = mainDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + casTex->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = mainDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + CompileShaders(); + + logger::info("[VRStereoOptimizations] Resources created: mode tex {}x{} (full SBS)", mainDesc.Width, mainDesc.Height); +} + +void VRStereoOptimizations::CompileShaders() +{ + std::vector> csDefines = { + { "VR", nullptr }, + { "FRAMEBUFFER", nullptr } + }; + + std::vector> vspsDefines = { + { "VR", nullptr } + }; + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", csDefines, "cs_5_0")) + stencilCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS"); + + { + auto debugDefines = csDefines; + debugDefines.push_back({ "DEBUG_DEPTH_MAP", nullptr }); + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", debugDefines, "cs_5_0")) + stencilDebugDepthMapCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS (DEBUG_DEPTH_MAP)"); + } + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWriteVS.hlsl", vspsDefines, "vs_5_0")) + stencilWriteVS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWriteVS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWritePS.hlsl", vspsDefines, "ps_5_0")) + stencilWritePS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWritePS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\ReprojectionCS.hlsl", csDefines, "cs_5_0")) + reprojectionCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile ReprojectionCS"); + + { + std::vector> casDefines = {}; + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VR\\CASCS.hlsl", casDefines, "cs_5_0")) + casCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile CASCS"); + } +} + +void VRStereoOptimizations::ClearShaderCache() +{ + stencilCS = nullptr; + stencilDebugDepthMapCS = nullptr; + stencilWriteVS = nullptr; + stencilWritePS = nullptr; + reprojectionCS = nullptr; + casCS = nullptr; + dssCache.clear(); +} + +void VRStereoOptimizations::Reset() +{ + stencilActive = false; + stencilSwapCount = 0; +} + +//============================================================================= +// IMGUI SETTINGS +//============================================================================= + +void VRStereoOptimizations::DrawSettings() +{ + const char* modeNames[] = { "Off", "Enable" }; + int currentMode = static_cast(settings.stereoMode); + if (ImGui::Combo("Feature Enable", ¤tMode, modeNames, IM_ARRAYSIZE(modeNames))) + settings.stereoMode = static_cast(currentMode); + + // MIP LOD Bias section (always shown, independent of stereo mode) + ImGui::Separator(); + const char* mipBiasModes[] = { "Off", "All Textures", "Distant Trees" }; + ImGui::Combo("MIP LOD Bias", &settings.mipBiasMode, mipBiasModes, 3); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Off: No MIP bias\nAll Textures: Depth-gated sharpening for all textures\nDistant Trees: Depth-gated sharpening for foliage only"); + + if (settings.mipBiasMode > 0) { + ImGui::SliderFloat("MIP Bias Strength", &settings.mipLodBias, -3.0f, 0.0f, "%.2f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Negative = sharper. -0.5 subtle, -1.0 moderate, -2.0 aggressive."); + ImGui::SliderFloat("MIP Near Distance", &settings.mipBiasNearDist, 0.0f, 10000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Game units. No MIP bias closer than this distance."); + ImGui::SliderFloat("MIP Far Distance", &settings.mipBiasFarDist, 0.0f, 20000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Game units. Full MIP bias beyond this distance.\nSmooth ramp between near and far."); + } + ImGui::Separator(); + + // CAS slider hidden for now — forced to 0 + // ImGui::SliderFloat("CAS Sharpening", &settings.casStrength, 0.0f, 1.0f, "%.2f"); + // if (ImGui::IsItemHovered()) + // ImGui::SetTooltip("Contrast Adaptive Sharpening (intended for use with TAA).\n0 = disabled, higher = sharper."); + // ImGui::Separator(); + + if (settings.stereoMode == StereoMode::Off) + return; + + ImGui::SliderFloat("Disocclusion Depth Threshold", &settings.disocclusionDepthThreshold, 0.001f, 0.1f, "%.4f"); + + if (globals::state->IsDeveloperMode()) { + if (ImGui::TreeNode("Debug")) { + ImGui::SliderFloat("Full Blend Distance", &settings.fullBlendDistance, 0.0f, 10000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Geometry closer than this distance (game units) is fully shaded in both eyes and bilaterally blended for 2x supersampling. 0 = disabled."); + + ImGui::SliderFloat("POM Depth Scale", &settings.pomDepthScale, 0.0f, 500.0f, "%.1f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Scale factor for POM depth correction in stereo reprojection.\n1.0 = physical scale. Increase for more visible POM stereo depth."); + ImGui::Checkbox("Skip Pixel Reprojection", &settings.debugSkipMerge); + ImGui::Checkbox("Full Blend Depth View", &settings.debugFullBlendDepth); + ImGui::Checkbox("Debug POM Depth", &settings.debugPOMDepth); + if (settings.debugFullBlendDepth) + ImGui::TextColored(ImVec4(0, 1, 1, 1), " Cyan = full blend zone (closer = stronger tint)"); + ImGui::Text("Stencil swaps this frame: %u", stencilSwapCount); + ImGui::TreePop(); + } + } +} + +//============================================================================= +// CONSTANT BUFFER UPDATE +//============================================================================= + +void VRStereoOptimizations::UpdateConstantBuffer() +{ + float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); + + VRStereoOptParams params{}; + params.FrameDim[0] = resolution.x; + params.FrameDim[1] = resolution.y; + params.RcpFrameDim[0] = 1.0f / resolution.x; + params.RcpFrameDim[1] = 1.0f / resolution.y; + params.StereoModeValue = static_cast(settings.stereoMode); + params.DisocclusionThreshold = settings.disocclusionDepthThreshold; + params.EdgeDepthThreshold = settings.edgeDepthThreshold; + params.EdgeWidth = static_cast(settings.edgeWidth); + params.QualityJitter[0] = settings.qualityJitterOffset; + params.QualityJitter[1] = settings.qualityJitterOffset; + params.FoveatedRadius = settings.foveatedRegionRadius; + params.FoveatedCenter[0] = settings.foveatedRegionCenterX; + params.FoveatedCenter[1] = settings.foveatedRegionCenterY; + params.MinEdgeDistance = settings.minEdgeDistance; + params.FullBlendDistance = settings.fullBlendDistance; + + paramsCB->Update(params); +} + +//============================================================================= +// PHASE 1: STENCIL CLASSIFICATION + WRITE +//============================================================================= + +void VRStereoOptimizations::DispatchStencil() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!stencilCS || !stencilWriteVS || !stencilWritePS || !texPerPixelMode || !paramsCB) + return; + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Stencil"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Stencil"); + + auto context = globals::d3d::context; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + // Use live depth buffer (kMAIN) instead of kPOST_ZPREPASS_COPY — at StartDeferred time, + // kPOST_ZPREPASS_COPY is stale (previous frame). kMAIN has fresh z-prepass depth so + // StencilCS can correctly detect sky-vs-geometry edges in the current frame. + auto renderer = globals::game::renderer; + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + if (!depthSRV) { + logger::warn("[VRStereoOptimizations] DispatchStencil: depthSRV is null, skipping"); + return; + } + + // Dispatch classification CS over Eye 1 region + // Input: t0 = depth, b1 = params CB + // Output: u0 = per-pixel mode texture + { + ID3D11ShaderResourceView* srvs[1]{ depthSRV }; + ID3D11UnorderedAccessView* uavs[1]{ texPerPixelMode->uav.get() }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 1, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + auto* activeStencilCS = (settings.debugDepthMap && stencilDebugDepthMapCS) ? stencilDebugDepthMapCS.get() : stencilCS.get(); + context->CSSetShader(activeStencilCS, nullptr, 0); + + uint32_t fullWidth = texPerPixelMode->desc.Width; + uint32_t fullHeight = texPerPixelMode->desc.Height; + context->Dispatch((fullWidth + 7) / 8, (fullHeight + 7) / 8, 1); + + // Cleanup CS bindings + ID3D11ShaderResourceView* nullSRV = nullptr; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 1, &nullSRV); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } + + // Transfer classification to hardware stencil buffer + ExecuteStencilWritePass(); + + stencilActive = true; + stencilSwapCount = 0; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::ExecuteStencilWritePass() +{ + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + + // ===== SAVE FULL D3D11 PIPELINE STATE ===== + + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + + ID3D11DepthStencilState* savedDSS = nullptr; + UINT savedStencilRef = 0; + context->OMGetDepthStencilState(&savedDSS, &savedStencilRef); + + ID3D11BlendState* savedBlendState = nullptr; + FLOAT savedBlendFactor[4] = {}; + UINT savedSampleMask = 0; + context->OMGetBlendState(&savedBlendState, savedBlendFactor, &savedSampleMask); + + ID3D11RasterizerState* savedRS = nullptr; + context->RSGetState(&savedRS); + + D3D11_VIEWPORT savedViewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; + UINT numViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + context->RSGetViewports(&numViewports, savedViewports); + + ID3D11VertexShader* savedVS = nullptr; + context->VSGetShader(&savedVS, nullptr, nullptr); + + ID3D11PixelShader* savedPS = nullptr; + context->PSGetShader(&savedPS, nullptr, nullptr); + + ID3D11GeometryShader* savedGS = nullptr; + context->GSGetShader(&savedGS, nullptr, nullptr); + + ID3D11InputLayout* savedInputLayout = nullptr; + context->IAGetInputLayout(&savedInputLayout); + + D3D11_PRIMITIVE_TOPOLOGY savedTopology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + context->IAGetPrimitiveTopology(&savedTopology); + + ID3D11ShaderResourceView* savedPSSRVs[2] = {}; + context->PSGetShaderResources(0, 2, savedPSSRVs); + + ID3D11Buffer* savedPSCB = nullptr; + context->PSGetConstantBuffers(1, 1, &savedPSCB); + + // ===== SET UP STENCIL WRITE PASS ===== + + // Use our custom read-only-depth DSV to allow simultaneous depth SRV binding (t1). + // D3D11_DSV_READ_ONLY_DEPTH permits depth SRV + stencil write simultaneously. + // Using views[0] would cause D3D11 to silently NULL the depth SRV. + // depthData.readOnlyViews[0] has BOTH read-only flags and doesn't allow stencil writes. + context->OMSetRenderTargets(0, nullptr, stencilWriteReadOnlyDSV.get()); + context->OMSetDepthStencilState(stencilWriteDSS.get(), 1); + context->RSSetState(stencilWriteRS.get()); + + // Eye 1 viewport (right half of SBS buffer) + { + D3D11_TEXTURE2D_DESC mainDesc; + renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN].texture->GetDesc(&mainDesc); + + D3D11_VIEWPORT vp{}; + vp.TopLeftX = static_cast(mainDesc.Width / 2); + vp.TopLeftY = 0.0f; + vp.Width = static_cast(mainDesc.Width / 2); + vp.Height = static_cast(mainDesc.Height); + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + } + + // Bind shaders and mode texture + context->VSSetShader(stencilWriteVS.get(), nullptr, 0); + context->PSSetShader(stencilWritePS.get(), nullptr, 0); + context->GSSetShader(nullptr, nullptr, 0); + + ID3D11ShaderResourceView* modeSRV = texPerPixelMode->srv.get(); + context->PSSetShaderResources(0, 1, &modeSRV); + + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + context->PSSetShaderResources(1, 1, &depthSRV); + + // Bind params CB to pixel shader (CS and PS have separate CB bindings) + auto cbPtr = paramsCB->CB(); + context->PSSetConstantBuffers(1, 1, &cbPtr); + + // Fullscreen triangle: no VB/IB, procedurally generated in VS + context->IASetInputLayout(nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + context->Draw(3, 0); + + // ===== RESTORE FULL D3D11 PIPELINE STATE ===== + + ID3D11ShaderResourceView* nullSRVs[2] = {}; + context->PSSetShaderResources(0, 2, nullSRVs); + + context->PSSetConstantBuffers(1, 1, &savedPSCB); + + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + context->OMSetDepthStencilState(savedDSS, savedStencilRef); + context->OMSetBlendState(savedBlendState, savedBlendFactor, savedSampleMask); + context->RSSetState(savedRS); + context->RSSetViewports(numViewports, savedViewports); + context->VSSetShader(savedVS, nullptr, 0); + context->PSSetShader(savedPS, nullptr, 0); + context->GSSetShader(savedGS, nullptr, 0); + context->IASetInputLayout(savedInputLayout); + context->IASetPrimitiveTopology(savedTopology); + context->PSSetShaderResources(0, 2, savedPSSRVs); + + // Release COM references acquired by Get* calls + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + if (savedDSV) + savedDSV->Release(); + if (savedDSS) + savedDSS->Release(); + if (savedBlendState) + savedBlendState->Release(); + if (savedRS) + savedRS->Release(); + if (savedVS) + savedVS->Release(); + if (savedPS) + savedPS->Release(); + if (savedGS) + savedGS->Release(); + if (savedInputLayout) + savedInputLayout->Release(); + if (savedPSSRVs[0]) + savedPSSRVs[0]->Release(); + if (savedPSSRVs[1]) + savedPSSRVs[1]->Release(); + if (savedPSCB) + savedPSCB->Release(); +} + +void VRStereoOptimizations::PerformLateStencilWrite() +{ + // Placeholder for future multi-pass stencil strategies +} + +//============================================================================= +// DSS CACHE: CLONE + STENCIL NOT_EQUAL ENFORCEMENT +//============================================================================= + +ID3D11DepthStencilState* VRStereoOptimizations::GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS) +{ + if (!originalDSS || !stencilActive) + return originalDSS; + + stencilSwapCount++; + + auto it = dssCache.find(originalDSS); + if (it != dssCache.end()) + return it->second.get(); + + // Clone original desc and add read-only stencil NOT_EQUAL test + D3D11_DEPTH_STENCIL_DESC desc{}; + originalDSS->GetDesc(&desc); + + desc.StencilEnable = TRUE; + desc.StencilReadMask = 0xFF; + desc.StencilWriteMask = 0x00; // Read-only: game rendering must not modify our marks + + // NOT_EQUAL with ref=1: skip pixels where stencil == 1 (MODE_MAIN) + desc.FrontFace.StencilFunc = D3D11_COMPARISON_NOT_EQUAL; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.BackFace = desc.FrontFace; + + winrt::com_ptr modifiedDSS; + HRESULT hr = globals::d3d::device->CreateDepthStencilState(&desc, modifiedDSS.put()); + if (FAILED(hr)) { + logger::warn("[VRStereoOptimizations] Failed to create modified DSS (HRESULT: {:#x})", static_cast(hr)); + return originalDSS; + } + + auto* result = modifiedDSS.get(); + dssCache[originalDSS] = std::move(modifiedDSS); + + return result; +} + +//============================================================================= +// PHASE 3: REPROJECTION COMPUTE SHADER +//============================================================================= + +void VRStereoOptimizations::DispatchReprojection() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!reprojectionCS || !texPerPixelMode || !paramsCB) { + DeactivateStencil(); + return; + } + if (settings.debugSkipMerge) { + DeactivateStencil(); + return; + } + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Reprojection"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Reprojection"); + + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + auto* depthSRV = Util::GetCurrentSceneDepthSRV(); + + // Bind: t0 = depth, t1 = mode texture, u0 = main UAV, b1 = params + ID3D11ShaderResourceView* srvs[2]{ + depthSRV, + texPerPixelMode->srv.get() + }; + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 2, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(reprojectionCS.get(), nullptr, 0); + + // Dispatch over full SBS texture + uint32_t fullWidth = texPerPixelMode->desc.Width; + uint32_t fullHeight = texPerPixelMode->desc.Height; + context->Dispatch((fullWidth + 7) / 8, (fullHeight + 7) / 8, 1); + + // Cleanup + ID3D11ShaderResourceView* nullSRVs[2] = {}; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + + // Stencil culling is done for this frame + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::DeactivateStencil() +{ + if (!stencilActive) + return; + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; +} + +//============================================================================= +// CAS (CONTRAST ADAPTIVE SHARPENING) - POST-TAA +//============================================================================= + +void VRStereoOptimizations::ApplyCAS(RE::RENDER_TARGET a_target) +{ + logger::trace("[VRStereoOptimizations] CAS: entered (strength={}, casCS={}, casTex={}, casParamsBuf={})", + settings.casStrength, (void*)casCS.get(), (void*)casTex.get(), (void*)casParamsBuf.get()); + + if (settings.casStrength <= 0.0f || !casCS || !casTex || !casParamsBuf) + return; + + if (!REL::Module::IsVR()) + return; + + auto renderer = globals::game::renderer; + auto context = globals::d3d::context; + + // Get the render target that post-processing just wrote to + auto& target = renderer->GetRuntimeData().renderTargets[a_target]; + if (!target.texture || !target.SRV) { + logger::trace("[VRStereoOptimizations] CAS: target RT has no texture/SRV, skipping"); + return; + } + + D3D11_TEXTURE2D_DESC targetDesc; + target.texture->GetDesc(&targetDesc); + logger::trace("[VRStereoOptimizations] CAS: dispatching on RT {} ({}x{}, strength={})", (int)a_target, targetDesc.Width, targetDesc.Height, settings.casStrength); + + // Check for dimension/format mismatch with intermediate texture + D3D11_TEXTURE2D_DESC casTexDesc; + static_cast(casTex->resource.get())->GetDesc(&casTexDesc); + if (casTexDesc.Width != targetDesc.Width || casTexDesc.Height != targetDesc.Height || casTexDesc.Format != targetDesc.Format) { + logger::info("[VRStereoOptimizations] CAS: recreating casTex to match target ({}x{} fmt={} -> {}x{} fmt={})", + casTexDesc.Width, casTexDesc.Height, (int)casTexDesc.Format, + targetDesc.Width, targetDesc.Height, (int)targetDesc.Format); + + D3D11_TEXTURE2D_DESC newDesc{}; + newDesc.Width = targetDesc.Width; + newDesc.Height = targetDesc.Height; + newDesc.MipLevels = 1; + newDesc.ArraySize = 1; + newDesc.Format = targetDesc.Format; + newDesc.SampleDesc.Count = 1; + newDesc.SampleDesc.Quality = 0; + newDesc.Usage = D3D11_USAGE_DEFAULT; + newDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + newDesc.CPUAccessFlags = 0; + newDesc.MiscFlags = 0; + + casTex = eastl::make_unique(newDesc); + casTex->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = targetDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + casTex->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = targetDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + // Update sharpness parameter via Map/Unmap + { + D3D11_MAPPED_SUBRESOURCE mapped; + if (SUCCEEDED(context->Map(casParamsBuf.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped))) { + *static_cast(mapped.pData) = settings.casStrength; + context->Unmap(casParamsBuf.get(), 0); + } + } + + // Unbind the RT so we can read from it + context->OMSetRenderTargets(0, nullptr, nullptr); + + // Dispatch CAS: read from target SRV, write to casTex UAV + { + ID3D11ShaderResourceView* views[2] = { target.SRV, casParamsSRV.get() }; + context->CSSetShaderResources(0, 2, views); + + ID3D11UnorderedAccessView* uavs[1] = { casTex->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + context->CSSetShader(casCS.get(), nullptr, 0); + + context->Dispatch((targetDesc.Width + 7) / 8, (targetDesc.Height + 7) / 8, 1); + } + + // Cleanup CS state + ID3D11ShaderResourceView* nullSRV[2] = { nullptr, nullptr }; + context->CSSetShaderResources(0, 2, nullSRV); + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + + // Copy sharpened result back to the render target + context->CopyResource(target.texture, casTex->resource.get()); + + globals::game::stateUpdateFlags->set(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET); +} diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h new file mode 100644 index 0000000000..5f14963854 --- /dev/null +++ b/src/Features/VRStereoOptimizations.h @@ -0,0 +1,232 @@ +#pragma once + +#include "Feature.h" + +#include +#include +#include + +/** + * @brief VR Stereo Rendering Optimizations feature. + * + * Uses hardware stencil culling to skip Eye 1 pixel shading for pixels that can be + * reprojected from Eye 0 via lateral stereo reprojection, then runs a compute shader + * to fill those pixels. This avoids redundant pixel shading in overlapping stereo regions. + * + * Pipeline: + * 1. DispatchStencil() - CS classifies per-pixel reprojection viability into a mode texture, + * then a fullscreen VS/PS pass writes that classification into the stencil buffer. + * 2. (Game renders Eye 1) - Hardware stencil test skips shading for marked pixels. + * 3. DispatchReprojection() - CS reprojects Eye 0 color into the skipped Eye 1 pixels. + */ +struct VRStereoOptimizations : public Feature +{ + //============================================================================= + // ENUMS + //============================================================================= + + /// Operating mode for stereo reprojection + enum class StereoMode : uint32_t + { + Off = 0, ///< Feature disabled + Enable = 1 ///< Stereo reprojection enabled + }; + + /// Per-pixel classification written by StencilCS + enum PixelMode : uint8_t + { + MODE_DISOCCLUDED = 0, ///< Fully shaded, no reprojection, no blend + MODE_EDGE = 1, ///< Fully shaded + bilateral blend with other eye + MODE_MAIN = 2, ///< Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite (Perf) / bilateral (Quality) + MODE_EDGE_NEIGHBOUR = 3, ///< Outer band: background pixels near edge, blended in post-process + }; + + //============================================================================= + // FEATURE BASE CLASS OVERRIDES + //============================================================================= + + virtual inline std::string GetName() override { return "VR Stereo Optimizations"; } + virtual inline std::string GetShortName() override { return "VRStereoOptimizations"; } + virtual inline std::string_view GetShaderDefineName() override { return "VR_STEREO_OPT"; } + virtual inline std::string_view GetCategory() const override { return "Display"; } + virtual inline bool HasShaderDefine(RE::BSShader::Type t) override { return t == RE::BSShader::Type::Utility; } + virtual inline bool SupportsVR() override { return true; } + + virtual void SetupResources() override; + virtual void Reset() override; + virtual void DrawSettings() override; + virtual void SaveSettings(json& o_json) override; + virtual void LoadSettings(json& o_json) override; + virtual void RestoreDefaultSettings() override; + virtual void ClearShaderCache() override; + + virtual std::pair> GetFeatureSummary() override + { + return { + "Stereo rendering optimizations for VR that skip redundant pixel shading via stencil culling and lateral reprojection.", + { "Hardware stencil culling of Eye 1 pixels reprojectable from Eye 0", + "Compute shader lateral reprojection to fill culled pixels", + "Performance, Quality, and Foveated modes", + "Debug visualization overlays" } + }; + } + + //============================================================================= + // SETTINGS + //============================================================================= + + struct Settings + { + StereoMode stereoMode = StereoMode::Enable; + float disocclusionDepthThreshold = 0.01f; + float edgeDepthThreshold = 0.05f; + int edgeWidth = 3; ///< Half-width of edge band in pixels (total band = 2 * edgeWidth) + float minEdgeDistance = 5000.0f; ///< Minimum linearized depth for edge AA (game units) + float fullBlendDistance = 0.0f; ///< Linearized depth below which both eyes are fully shaded + blended (game units) + float pomDepthScale = 22.5f; ///< Scale factor for POM depth correction in stereo reprojection + bool debugFullBlendDepth = false; ///< Show full blend depth zone as cyan overlay + float qualityJitterOffset = 0.125f; + float foveatedRegionRadius = 0.3f; + float foveatedRegionCenterX = 0.5f; + float foveatedRegionCenterY = 0.5f; + bool useEyeTracking = false; + + int reprojectionMode = 5; // 0=Blend, 4=Overwrite, 5=Overwrite Eye1 Only + + // Debug controls + bool debugVisualization = false; + bool debugSkipMerge = false; + bool debugForceAllStencil = false; + bool debugForceAllReprojectCS = false; + bool debugDepthMap = false; + bool debugPOMDepth = false; ///< Show POM depth data (Reflectance.w) as heatmap overlay + + // MIP LOD Bias (negative = sharper textures) + // 0 = Off, 1 = All textures (global), 2 = Distant trees only (depth-gated TREE_ANIM) + int mipBiasMode = 0; + float mipLodBias = -2.0f; + float mipBiasNearDist = 2000.0f; ///< Game units: no bias closer than this + float mipBiasFarDist = 6000.0f; ///< Game units: full bias beyond this + + // CAS (Contrast Adaptive Sharpening) - post-TAA + float casStrength = 0.0f; ///< 0.0 = disabled, 0.0-1.0 = subtle to strong (hidden for now) + float alphaTestThreshold = 0.001f; ///< Alpha floor for TREE_ANIM zombie texel removal + } settings; + + //============================================================================= + // GPU CONSTANT BUFFER (must match HLSL cbuffer layout exactly) + //============================================================================= + + struct alignas(16) VRStereoOptParams + { + float FrameDim[2]; // Full stereo buffer dimensions + float RcpFrameDim[2]; // 1.0 / FrameDim + + uint32_t StereoModeValue; // Cast of StereoMode enum (0-3) + float DisocclusionThreshold; + float EdgeDepthThreshold; + uint32_t EdgeWidth; + + float QualityJitter[2]; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; + float pad2; + + float FoveatedCenter[2]; // Foveal region center UV + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth for full blend zone + }; + static_assert(sizeof(VRStereoOptParams) % 16 == 0, "VRStereoOptParams must be 16-byte aligned for HLSL cbuffer."); + + //============================================================================= + // PUBLIC API + //============================================================================= + + /** + * @brief Classify Eye 1 pixels and write stencil marks. + * + * Dispatches the stencil classification CS, then performs a fullscreen triangle pass + * to write the classification into the hardware stencil buffer. + * Called from Deferred::StartDeferred() after OverrideBlendStates(). + */ + void DispatchStencil(); + + /** + * @brief Reproject Eye 0 color into stencil-culled Eye 1 pixels. + * + * Copies the main render target, then dispatches a CS to fill skipped pixels + * using lateral reprojection from Eye 0. + * Called from Deferred::DeferredPasses() after DeferredCompositeCS. + */ + void DispatchReprojection(); + + /** + * @brief Creates or retrieves a modified DSS with stencil NOT_EQUAL test. + * + * Clones the given DSS with read-only stencil (WriteMask=0x00, Func=NOT_EQUAL, ref=1) + * so that pixels marked by our stencil write pass are skipped during normal rendering. + * Cached per unique input DSS pointer. + * + * @param originalDSS The original depth-stencil state to modify. + * @return Modified DSS with stencil test, or original if creation fails. + */ + ID3D11DepthStencilState* GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS); + + /// Whether the stencil pass is currently active this frame + bool IsStencilActive() const { return stencilActive; } + + /// Deactivate stencil culling (called from Deferred after geometry rendering completes) + void DeactivateStencil(); + + /// Apply CAS sharpening to the main render target (called after TAA) + void ApplyCAS(RE::RENDER_TARGET a_target); + + /// Get mode texture SRV for external consumers (e.g., DeferredCompositeCS Eye 1 skip) + ID3D11ShaderResourceView* GetModeTextureSRV() const { return texPerPixelMode ? texPerPixelMode->srv.get() : nullptr; } + +private: + //============================================================================= + // INTERNAL METHODS + //============================================================================= + + /// Fullscreen triangle pass: reads mode texture, writes stencil ref=1 for MODE_MAIN pixels + void ExecuteStencilWritePass(); + + /// Late stencil write callback (placeholder for future multi-pass strategies) + void PerformLateStencilWrite(); + + /// Compiles all shaders used by this feature + void CompileShaders(); + + /// Updates the constant buffer with current settings and frame dimensions + void UpdateConstantBuffer(); + + //============================================================================= + // GPU RESOURCES + //============================================================================= + + eastl::unique_ptr paramsCB; + eastl::unique_ptr texPerPixelMode; ///< R8_UINT classification texture (full SBS resolution) + eastl::unique_ptr reprojectionCopyTex; ///< Copy of main RT for reprojection read + + winrt::com_ptr stencilWriteDSS; + winrt::com_ptr stencilWriteRS; + winrt::com_ptr stencilWriteReadOnlyDSV; ///< Read-only-depth DSV for stencil write pass (allows simultaneous depth SRV) + + winrt::com_ptr stencilCS; + winrt::com_ptr stencilDebugDepthMapCS; + winrt::com_ptr stencilWriteVS; + winrt::com_ptr stencilWritePS; + winrt::com_ptr reprojectionCS; + + // CAS sharpening resources + winrt::com_ptr casCS; + eastl::unique_ptr casTex; ///< UAV-capable texture for CAS output + winrt::com_ptr casParamsBuf; ///< Structured buffer for CAS sharpness param + winrt::com_ptr casParamsSRV; ///< SRV for CAS sharpness param + + /// Cache of original DSS -> modified DSS with stencil NOT_EQUAL enforcement + std::unordered_map> dssCache; + + bool stencilActive = false; + uint32_t stencilSwapCount = 0; +}; diff --git a/src/Globals.cpp b/src/Globals.cpp index e90c3bf4ce..f4c245e3a4 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -29,6 +29,7 @@ #include "Features/UnifiedWater.h" #include "Features/Upscaling.h" #include "Features/VR.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricLighting.h" #include "Features/VolumetricShadows.h" #include "Features/WaterEffects.h" @@ -84,6 +85,7 @@ namespace globals RenderDoc renderDoc{}; WeatherEditor weatherEditor{}; ExponentialHeightFog exponentialHeightFog{}; + VRStereoOptimizations vrStereoOptimizations{}; namespace llf { @@ -266,13 +268,79 @@ namespace globals { static void thunk(ID3D11DeviceContext* This, ID3D11Resource* pResource, UINT Subresource) { - if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) + if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) { CacheFramebuffer(); + } func(This, pResource, Subresource); } static inline REL::Relocation func; }; + /** + * @brief Hooked OMSetDepthStencilState — replaces DSS with stencil-enforcing version when VR stereo opt is active. + * + * vtable index 36 for ID3D11DeviceContext::OMSetDepthStencilState. + * When VRStereoOptimizations has written stencil marks, this hook transparently swaps + * the game's DSS for a modified version that adds a stencil NOT_EQUAL test, causing + * marked Eye 1 pixels to be skipped during normal rendering. + */ + struct ID3D11DeviceContext_OMSetDepthStencilState + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilState* pDepthStencilState, UINT StencilRef) + { + if (globals::game::isVR && pDepthStencilState) { + auto& stereoOpt = globals::features::vrStereoOptimizations; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + pDepthStencilState = stereoOpt.GetOrCreateModifiedDSS(pDepthStencilState); + StencilRef = 1; // Must match the ref written by our stencil pass + } + } + func(This, pDepthStencilState, StencilRef); + } + static inline REL::Relocation func; + }; + + /** + * @brief Hooked ClearDepthStencilView — blocks stencil clears when VR stereo opt stencil is active. + * + * vtable index 53 for ID3D11DeviceContext::ClearDepthStencilView. + * Prevents the game from clearing our stencil marks between the stencil write and + * the reprojection pass by stripping the D3D11_CLEAR_STENCIL flag. + */ + struct ID3D11DeviceContext_ClearDepthStencilView + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilView* pDepthStencilView, UINT ClearFlags, FLOAT Depth, UINT8 Stencil) + { + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vrStereoOptimizations; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + // Only protect the main scene DSV — allow other DSVs to clear normally + auto renderer = globals::game::renderer; + auto& mainDepth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (mainDepth.views[0]) { + // Compare the DSV being cleared against the main scene DSV + ID3D11Resource* clearRes = nullptr; + ID3D11Resource* mainRes = nullptr; + pDepthStencilView->GetResource(&clearRes); + mainDepth.views[0]->GetResource(&mainRes); + bool isMainDSV = (clearRes == mainRes); + if (clearRes) + clearRes->Release(); + if (mainRes) + mainRes->Release(); + if (isMainDSV) { + ClearFlags &= ~D3D11_CLEAR_STENCIL; + if (ClearFlags == 0) + return; + } + } + } + } + func(This, pDepthStencilView, ClearFlags, Depth, Stencil); + } + static inline REL::Relocation func; + }; + /** * @brief Installs hooks on the Map and Unmap methods of the provided D3D11 device context. * @@ -282,5 +350,11 @@ namespace globals { stl::detour_vfunc<14, ID3D11DeviceContext_Map>(a_context); stl::detour_vfunc<15, ID3D11DeviceContext_Unmap>(a_context); + + // VR stereo optimization hooks: intercept DSS and stencil clear + if (globals::game::isVR) { + stl::detour_vfunc<36, ID3D11DeviceContext_OMSetDepthStencilState>(a_context); + stl::detour_vfunc<53, ID3D11DeviceContext_ClearDepthStencilView>(a_context); + } } } diff --git a/src/Globals.h b/src/Globals.h index fa96446891..9318c6b8d3 100644 --- a/src/Globals.h +++ b/src/Globals.h @@ -34,6 +34,7 @@ struct ExtendedTranslucency; struct Upscaling; struct WeatherEditor; struct ExponentialHeightFog; +struct VRStereoOptimizations; class State; class Deferred; @@ -91,6 +92,7 @@ namespace globals extern RenderDoc renderDoc; extern WeatherEditor weatherEditor; extern ExponentialHeightFog exponentialHeightFog; + extern VRStereoOptimizations vrStereoOptimizations; namespace llf { diff --git a/src/State.cpp b/src/State.cpp index 9794ce9441..ae66ae1e43 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -11,6 +11,7 @@ #include "Features/TerrainBlending.h" #include "Features/TerrainHelper.h" #include "Features/Upscaling.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricShadows.h" #include "Features/WeatherEditor.h" #include "Menu.h" @@ -850,6 +851,22 @@ void State::UpdateSharedData([[maybe_unused]] bool a_inWorld, [[maybe_unused]] b data.MipBias = 0; } + // VR MIP bias modes: 1 = All (global), 2 = Distant Trees (per-pixel in TREE_ANIM) + data.VRMipBias = 0; + data.VRMipBiasNearDist = 2000.0f; + data.VRMipBiasFarDist = 6000.0f; + data.VRMipBiasMode = 0; + if (globals::game::isVR) { + auto& s = globals::features::vrStereoOptimizations.settings; + if (s.mipBiasMode == 1 || s.mipBiasMode == 2) { + data.VRMipBias = s.mipLodBias; + data.VRMipBiasNearDist = s.mipBiasNearDist; + data.VRMipBiasFarDist = s.mipBiasFarDist; + data.VRMipBiasMode = static_cast(s.mipBiasMode); + } + data.VRAlphaTestThreshold = s.alphaTestThreshold; + } + // DALC to SH const auto& m = dalcTransform.rotate; const auto& t = dalcTransform.translate; diff --git a/src/State.h b/src/State.h index 5682562681..0542a7e1a6 100644 --- a/src/State.h +++ b/src/State.h @@ -210,7 +210,12 @@ class State uint InMapMenu; uint HideSky; float MipBias; - float pad0; + float VRMipBias; + float VRMipBiasNearDist; + float VRMipBiasFarDist; + uint VRMipBiasMode; // 0=Off, 1=All Textures, 2=Distant Trees only + float VRAlphaTestThreshold; // Alpha test threshold for VR TREE_ANIM (0 = use vanilla) + float4 pad0; // HLSL: float2 + implicit 8-byte gap before float4 AmbientSHR float4 AmbientSHR; float4 AmbientSHG; float4 AmbientSHB;