diff --git a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli index 4152bdb2d5..b8a4e3ccea 100644 --- a/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli +++ b/features/Extended Materials/Shaders/ExtendedMaterials/ExtendedMaterials.hlsli @@ -325,6 +325,7 @@ namespace ExtendedMaterials float2 GetParallaxCoords(float distance, float2 coords, float mipLevel, float3 viewDir, float3x3 tbn, float noise, Texture2D tex, SamplerState texSampler, uint channel, DisplacementParams params, out float pixelOffset) #endif { + pixelOffset = 0.5; float3 viewDirTS = normalize(mul(tbn, viewDir)); #if defined(LANDSCAPE) viewDirTS.xy /= viewDirTS.z * 0.7 + 0.3 + params[0].FlattenAmount; // Fix for objects at extreme viewing angles @@ -496,7 +497,7 @@ namespace ExtendedMaterials #endif nearBlendToFar *= nearBlendToFar; float offset = (1.0 - parallaxAmount) * -maxHeight + minHeight; - pixelOffset = lerp(parallaxAmount * scale, 0, nearBlendToFar); + pixelOffset = saturate(lerp(parallaxAmount, 0.5, nearBlendToFar)); return lerp(viewDirTS.xy * offset + coords.xy, coords, nearBlendToFar); } @@ -509,7 +510,7 @@ namespace ExtendedMaterials weights[5] = input.LandBlendWeights2.y; #endif - pixelOffset = 0; + pixelOffset = 0.5; return coords; } diff --git a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl index df107d9175..301b51651e 100644 --- a/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl +++ b/features/Upscaling/Shaders/Upscaling/ClearHMDMaskCS.hlsl @@ -4,20 +4,53 @@ // depth == 0.0 is the unrendered/hidden area value (Skyrim reversed-Z: far plane = 0). // DepthIn is the combined stereo depth buffer; DepthOffsetX selects the eye's half. // ColorInOut is the isolated per-eye buffer; ColorOffsetX is always 0. +// +// When DepthWidth > 0, coordinate scaling is enabled: depth is at render-res while +// color is at display-res. The shader maps display-res color coordinates to render-res +// depth coordinates for the mask lookup. +// +// FallbackIn (t1): when bound, masked pixels read from this texture instead of writing +// black. When unbound, D3D11 returns (0,0,0,0) — same as clearing to black. +// FallbackOffsetX selects the eye's half in the stereo fallback texture. cbuffer ClearHMDMaskCB : register(b0) { - uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) - uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) - uint pad0; - uint pad1; + uint DepthOffsetX; // X offset into combined stereo depth (0 = left, eyeWidth = right) + uint ColorOffsetX; // X offset into color target (always 0 for per-eye buffers) + uint DepthOffsetY; // Y offset into combined stereo depth (non-zero when viewport scaling crops vertically) + uint FallbackOffsetX; // X offset into FallbackIn for stereo (0 when unused or left eye) + // Optional coordinate scaling (zero = disabled, for backwards compat) + uint DepthWidth; // render-res eye width; if 0, no scaling (1:1 depth/color coords) + uint DepthHeight; // render-res eye height + uint ColorWidth; // display-res eye width + uint ColorHeight; // display-res eye height }; Texture2D DepthIn : register(t0); +Texture2D FallbackIn : register(t1); RWTexture2D ColorInOut : register(u0); [numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { - // Read from stereo depth, write to potentially stereo color - if (DepthIn[dispatchID.xy + uint2(DepthOffsetX, 0)] == 0.0) - ColorInOut[dispatchID.xy + uint2(ColorOffsetX, 0)] = float4(0.0, 0.0, 0.0, 0.0); + uint w, h; + ColorInOut.GetDimensions(w, h); + if (dispatchID.x >= w || dispatchID.y >= h) + return; + + uint2 colorPos = dispatchID.xy + uint2(ColorOffsetX, 0); + uint2 depthPos; + + if (DepthWidth > 0) { + // Scale from display-res color coordinates to render-res depth coordinates + depthPos = uint2( + (dispatchID.x * DepthWidth) / ColorWidth, + (dispatchID.y * DepthHeight) / ColorHeight) + + uint2(DepthOffsetX, DepthOffsetY); + } else { + depthPos = dispatchID.xy + uint2(DepthOffsetX, DepthOffsetY); + } + + if (DepthIn[depthPos] == 0.0) + ColorInOut[colorPos] = FallbackIn[dispatchID.xy + uint2(FallbackOffsetX, 0)]; + // When FallbackIn is unbound (existing callers): returns (0,0,0,0) → clears to black + // When FallbackIn is bound (TAA mask restore): returns display RT content } diff --git a/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl new file mode 100644 index 0000000000..8cf6b900d3 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DLSSCompositePS.hlsl @@ -0,0 +1,48 @@ +// Format-converting fullscreen pixel shader with optional bilinear upscale. +// Used by TAAReorder to composite between textures of different DXGI formats +// (e.g. R8G8B8A8_UNORM conductor RTs <-> R11G11B10_FLOAT kMAIN). +// The GPU's output merger handles format conversion automatically. +// +// BILINEAR_UPSCALE variant: upscales render-res content to display-res by +// mapping output pixel positions through the dynamic resolution scale, +// like PureDark's dynamicResScale in his blend shader. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D Source : register(t0); + +# ifdef BILINEAR_UPSCALE + +cbuffer CompositeCB : register(b0) +{ + float2 DynResScale; // renderRes / displayRes (per-eye) + float2 EyeOffset; // (i * eyeWidth, 0) in texels + float2 SrcTexSize; // full texture dimensions in texels + float2 pad; +}; + +SamplerState LinearSampler : register(s0); + +float4 main(VS_OUTPUT input) : SV_Target +{ + // Map display-res pixel position to render-res source position. + // Subtract eye offset, scale to render-res, add eye offset back. + float2 localPos = input.Position.xy - EyeOffset; + float2 srcLocal = localPos * DynResScale; + float2 srcPos = srcLocal + EyeOffset; + float2 srcUV = srcPos / SrcTexSize; + return Source.SampleLevel(LinearSampler, srcUV, 0); +} + +# else + +float4 main(VS_OUTPUT input) : SV_Target +{ + return Source.Load(int3(input.Position.xy, 0)); +} + +# endif // BILINEAR_UPSCALE + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl new file mode 100644 index 0000000000..e5650af665 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/DepthUpscalePS.hlsl @@ -0,0 +1,70 @@ +/** + * @file DepthUpscalePS.hlsl + * @brief Point-sampled depth buffer upscaling for VR depth-based culling + * + * When upscaling (FSR/DLSS) is active, the depth buffer is rendered at a lower + * resolution than the display. Skyrim VR's depth-based culling (OBBOcclusionTesting) + * reads from the depth buffer to determine object visibility, but with a mismatched + * resolution, objects may be incorrectly culled (appearing to flicker in/out of view). + * + * This shader upscales the low-resolution depth buffer to full resolution using + * pure point sampling. Previous conservative blending (GatherRed + lerp toward + * min depth) caused HAM mask bleed: depth == 0 values from the hidden area mesh + * leaked into valid depth through the 2x2 neighborhood blend, creating artifacts + * at the mask boundary after DRS upscaling. + * + * Based on depth upscaling approach by vrnord + * https://github.com/vrnord/skyrim-community-shaders-VR-DLSS + */ + +#include "Upscaling/UpscaleVS.hlsl" + +#if defined(PSHADER) +# include "Common/FrameBuffer.hlsli" +# include "Common/SharedData.hlsli" + +typedef VS_OUTPUT PS_INPUT; + +struct PS_OUTPUT +{ + float Depth: SV_Depth; +}; + +Texture2D DepthLowRes : register(t0); + +cbuffer DepthUpscaleCB : register(b0) +{ + float2 SourceDim; // Full texture dimensions (texels) + float2 InvSourceDim; // 1.0 / SourceDim + float2 Scale; // resolutionScale (render/display ratio) + float2 Pad; +}; + +/** + * @brief Main pixel shader entry point + * + * Pure point-sampled depth upscaling. Maps display-res pixel position to + * render-res texel and loads directly — no blending, no mask bleed. + */ +PS_OUTPUT main(PS_INPUT input) +{ + PS_OUTPUT psout; + + // Map full-res UV to render-res UV (same transform as the engine's + // GetDynamicResolutionAdjustedScreenPosition). + float2 uv = Scale * input.TexCoord; + + // Per-eye clamping for SBS stereo: prevent sampling across the center seam. + bool isRight = input.TexCoord.x >= 0.5; + float halfScale = 0.5 * Scale.x; + uv.x = clamp(uv.x, isRight ? halfScale : 0.0, isRight ? Scale.x : halfScale); + uv.y = clamp(uv.y, 0.0, Scale.y); + + // Nearest texel coordinate — pure point sampling, no blending + int2 texel = int2(floor(uv * SourceDim)); + psout.Depth = DepthLowRes.Load(int3(texel, 0)); + + return psout; +} + +#endif diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl new file mode 100644 index 0000000000..16116fb4e0 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositeCS.hlsl @@ -0,0 +1,41 @@ +cbuffer FeatherCB : register(b0) +{ + uint CropX; // paste position X in output space + uint CropY; // paste position Y in output space + uint CropW; // crop width + uint CropH; // crop height + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float3 pad; +}; + +Texture2D CropTexture : register(t0); // DLSS output (crop-sized, at {0,0}) +RWTexture2D OutputTexture : register(u0); // vrFinalOutput (already filled with periphery) + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + // dispatchID is in crop-local space (0..CropW-1, 0..CropH-1) + int2 cropLocal = int2(dispatchID.xy); + if (cropLocal.x >= (int)CropW || cropLocal.y >= (int)CropH) + return; + + // Output pixel = crop-local + paste offset + int2 pixel = cropLocal + int2(CropX, CropY); + + // Distance from nearest crop edge (positive = inside) + float distLeft = (float)cropLocal.x; + float distRight = (float)(CropW - 1 - cropLocal.x); + float distTop = (float)cropLocal.y; + float distBottom = (float)(CropH - 1 - cropLocal.y); + float distFromEdge = min(min(distLeft, distRight), min(distTop, distBottom)); + + float4 dlss = CropTexture.Load(int3(cropLocal, 0)); + + if (FeatherWidth <= 0.0 || distFromEdge >= FeatherWidth) { + // Inside crop interior or no feathering: 100% DLSS + OutputTexture[pixel] = dlss; + } else { + // Feather zone: smooth blend from periphery (TAA-stabilized) to DLSS + float blend = smoothstep(0.0, FeatherWidth, distFromEdge); + float4 periphery = OutputTexture[pixel]; + OutputTexture[pixel] = lerp(periphery, dlss, blend); + } +} diff --git a/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl new file mode 100644 index 0000000000..fb6ae3f277 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/FeatheredCompositePS.hlsl @@ -0,0 +1,56 @@ +// Feathered DLSS crop composite using hardware alpha blending. +// Based on PureDark's approach from Skyrim-Upscaler VR (MIT license). +// +// The render target already contains TAA'd periphery content. +// We output float4(DLSSColor, featherAlpha) and let the output merger's +// SrcAlpha/InvSrcAlpha blend preserve the periphery in the feather zone +// and outside the crop rect entirely. + +#include "Upscaling/UpscaleVS.hlsl" + +#ifdef PSHADER + +Texture2D CropTexture : register(t0); +SamplerState LinearSampler : register(s0); + +cbuffer FeatheredCompositeCB : register(b0) +{ + float2 CropOrigin; // paste position (x, y) in output-eye pixel coords + float2 CropSize; // crop width, height in pixels + float FeatherWidth; // feather distance in pixels (inward from crop edge) + float _pad0; + float2 SrcUVOrigin; // UV origin in source texture for this crop region + float2 SrcUVScale; // UV scale: maps [0,1] crop-local UV to source texture UV range +}; + +float4 main(VS_OUTPUT input) : SV_Target +{ + float2 pixelPos = input.Position.xy; + + // Distance from each edge of the crop rect (positive = inside) + float distLeft = pixelPos.x - CropOrigin.x; + float distRight = (CropOrigin.x + CropSize.x) - pixelPos.x; + float distTop = pixelPos.y - CropOrigin.y; + float distBottom = (CropOrigin.y + CropSize.y) - pixelPos.y; + + float minDist = min(min(distLeft, distRight), min(distTop, distBottom)); + + // Outside crop rect: fully transparent (hardware blend preserves TAA'd periphery) + if (minDist <= 0.0) + return float4(0, 0, 0, 0); + + // Feather alpha: smoothstep ramp from 0 at edge to 1 at FeatherWidth inside + // (matches the smoothstep from the original CS for visual consistency) + float alpha = (FeatherWidth > 0.0) ? smoothstep(0.0, FeatherWidth, minDist) : 1.0; + + // Map pixel position to crop-local UV [0,1], then remap to source texture UV. + // For per-eye textures: SrcUVOrigin=(0,0), SrcUVScale=(1,1) (identity). + // For SBS textures: SrcUVOrigin/Scale select the correct eye's crop region. + float2 cropUV = (pixelPos - CropOrigin) / CropSize; + float2 srcUV = cropUV * SrcUVScale + SrcUVOrigin; + float3 dlssColor = CropTexture.SampleLevel(LinearSampler, srcUV, 0).rgb; + + return float4(dlssColor, alpha); +} + +#endif // PSHADER diff --git a/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl new file mode 100644 index 0000000000..0856c6bd26 --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/ForceAlphaCS.hlsl @@ -0,0 +1,17 @@ +// Forces alpha to 1.0 across the entire texture. +// Used after DLSS center paste onto submit texture to ensure Scaleform UI renders. +// DLSS output may have alpha=0 (from R11G11B10→R8G8B8A8 conversion with no alpha source), +// which can prevent UI compositing in the DLSS center area. + +RWTexture2D ColorInOut : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + uint w, h; + ColorInOut.GetDimensions(w, h); + if (dispatchID.x >= w || dispatchID.y >= h) + return; + + float4 c = ColorInOut[dispatchID.xy]; + c.a = 1.0; + ColorInOut[dispatchID.xy] = c; +} diff --git a/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl new file mode 100644 index 0000000000..315541e76d --- /dev/null +++ b/features/Upscaling/Shaders/Upscaling/VRPeripheryFillCS.hlsl @@ -0,0 +1,24 @@ +// Bilinear upscale from render-resolution per-eye buffer to display-resolution per-eye buffer. +// Used for VR viewport scaling: fills the full eye output with a cheap upscale so the +// periphery (outside the DLSS-processed center) is not black/empty. + +cbuffer PeripheryFillCB : register(b0) +{ + uint SrcWidth; + uint SrcHeight; + uint DstWidth; + uint DstHeight; +}; + +Texture2D SrcTexture : register(t0); +SamplerState LinearSampler : register(s0); +RWTexture2D DstTexture : register(u0); + +[numthreads(8, 8, 1)] void main(uint3 dispatchID : SV_DispatchThreadID) { + if (dispatchID.x >= DstWidth || dispatchID.y >= DstHeight) + return; + + // Normalized UV with half-pixel offset for correct bilinear sampling + float2 uv = (float2(dispatchID.xy) + 0.5) / float2(DstWidth, DstHeight); + DstTexture[dispatchID.xy] = SrcTexture.SampleLevel(LinearSampler, uv, 0); +} diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 4ea0d4d07c..cb5d3c0416 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -23,7 +23,7 @@ namespace SharedData bool InInterior; // If the area lacks a directional shadow light e.g. the sun or moon bool InMapMenu; // If the world/local map is open (note that the renderer is still deferred here) bool HideSky; // HideSky flag in WorldSpace, e.g. Blackreach - float MipBias; // Offset to mip level for TAA sharpness# + float MipBias; // Offset to mip level for TAA sharpness float pad0; float4 AmbientSHR; float4 AmbientSHG; @@ -52,7 +52,7 @@ namespace SharedData bool EnableShadows; bool ExtendShadows; bool EnableParallaxWarpingFix; - float1 pad0; + bool pad0; }; struct CubemapCreatorSettings diff --git a/package/Shaders/Common/VR.hlsli b/package/Shaders/Common/VR.hlsli index d744022781..a3b3783c71 100644 --- a/package/Shaders/Common/VR.hlsli +++ b/package/Shaders/Common/VR.hlsli @@ -21,6 +21,7 @@ cbuffer VRValues : register(b13) float2 EyeOffsetScale : packoffset(c0.z); float4 EyeClipEdge[2] : packoffset(c1); } + #endif namespace Stereo diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl index f149255718..cfaefc714b 100644 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -19,6 +19,10 @@ RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); RWTexture2D MotionVectorsRW : register(u2); Texture2D DepthTexture : register(t4); +#if defined(VR_STEREO_OPT) +Texture2D StereoOptModeTexture : register(t16); +#endif + #if defined(DYNAMIC_CUBEMAPS) Texture2D ReflectanceTexture : register(t5); TextureCube EnvTexture : register(t6); @@ -92,6 +96,16 @@ void SampleSSGISpecular(uint2 pixCoord, sh2 lobe, inout float ao, out float3 il, uv *= FrameBuffer::DynamicResolutionParams2.xy; // adjust for dynamic res uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + +#if defined(VR_STEREO_OPT) + if (eyeIndex == 1) { + uint mode = StereoOptModeTexture[uint2(dispatchID.xy)] & 0x0F; + if (mode == 2 || mode == 1) { // MODE_MAIN or MODE_EDGE — stencil-culled, reprojected by StereoBlend + return; + } + } +#endif + uv = Stereo::ConvertFromStereoUV(uv, eyeIndex); float3 normalGlossiness = NormalRoughnessTexture[dispatchID.xy]; diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index aad23d96c7..ba0f19f3b9 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -3166,7 +3166,15 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } # endif - psout.Reflectance = float4(indirectLobeWeights.specular, psout.Diffuse.w); +# if defined(VR) && (defined(EMAT) || defined(TRUE_PBR)) && (defined(PARALLAX) || defined(LANDSCAPE) || defined(TRUE_PBR)) + // VR: store POM parallax amount for stereo reprojection depth correction. + // Read by StereoBlendCS to adjust Eye 1 (right eye) reprojection depth + // at POM-displaced surfaces. Not consumed on flat (SE/AE). + psout.Reflectance = float4(indirectLobeWeights.specular, + (pixelOffset > 0.0) ? saturate(pixelOffset) : 0.0); +# else + psout.Reflectance = float4(indirectLobeWeights.specular, 0.0); +# endif psout.NormalGlossiness = float4(GBuffer::EncodeNormal(screenSpaceNormal), saturate(1.0 - material.Roughness), psout.Diffuse.w); # if defined(SNOW) diff --git a/package/Shaders/RunGrass.hlsl b/package/Shaders/RunGrass.hlsl index f05c3d0edd..820503ab93 100644 --- a/package/Shaders/RunGrass.hlsl +++ b/package/Shaders/RunGrass.hlsl @@ -850,7 +850,6 @@ PS_OUTPUT main(PS_INPUT input) # if defined(RENDER_DEPTH) float diffuseAlpha = input.VertexColor.w * baseColor.w; - if ((diffuseAlpha - AlphaTestRefRS) < 0) { discard; } diff --git a/package/Shaders/VR/StereoBlendCS.hlsl b/package/Shaders/VR/StereoBlendCS.hlsl index 7322e9e513..bf5a082685 100644 --- a/package/Shaders/VR/StereoBlendCS.hlsl +++ b/package/Shaders/VR/StereoBlendCS.hlsl @@ -11,6 +11,7 @@ #include "Common/Color.hlsli" #include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" #include "Common/VR.hlsli" Texture2D ColorTexture : register(t0); @@ -18,6 +19,30 @@ Texture2D DepthTexture : register(t1); RWTexture2D OutputRW : register(u0); +#ifdef STEREO_OVERWRITE +RWTexture2D MotionRW : register(u1); +Texture2D ModeTexture : register(t2); +Texture2D ReflectanceTexture : register(t3); // .w = POM pixelOffset from Lighting pass +SamplerState LinearSampler : register(s0); + +# include "VRStereoOptimizations/modes.hlsli" + +// Hardware bilinear color sample from reprojected pixel coordinates. +// Converts integer pixel coords to proper full-texture UV for SampleLevel, +// clamped to the active DRS viewport to prevent sampling stale data. +// Motion vectors stay as integer Load() — filtering them breaks DLSS. +float4 SampleReprojectedColor(float2 stereoUV, float2 frameDim) +{ + uint texW, texH; + ColorTexture.GetDimensions(texW, texH); + float2 texSize = float2(texW, texH); + float2 minUV = 0.5 / texSize; + float2 maxUV = (frameDim - 0.5) / texSize; + stereoUV = clamp(stereoUV, minUV, maxUV); + return ColorTexture.SampleLevel(LinearSampler, stereoUV, 0); +} +#endif + cbuffer StereoBlendCB : register(b1) { float2 FrameDim; @@ -25,11 +50,16 @@ cbuffer StereoBlendCB : register(b1) float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer, 3 = POM depth heatmap + float FullBlendDistance; + float POMDepthScale; + float _pad; }; -static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend -static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kEdgeDepthThreshold = 0.05; // NDC depth difference above which a pixel is considered a depth discontinuity and excluded from stereo blend +static const int kEdgeMargin = 2; // Neighbor offset (pixels) for destination edge + mask boundary check +static const float kDepthAgreementThreshold = 0.015; // Relative depth difference threshold for overwrite mode disocclusion rejection // Samples four depth neighbors in a cross pattern (±offset pixels) around center, // clamped to eyeIndex's half of the packed stereo buffer to avoid seam contamination. @@ -46,6 +76,192 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (any(dtid >= uint2(FrameDim))) return; +#ifdef STEREO_OVERWRITE + // ========================================================================= + // Mode-driven stereo merge: reads per-pixel classification from StencilCS + // and applies appropriate action per mode and eye. + // Mode texture is full SBS resolution — ModeTexture[dtid] maps directly. + // ========================================================================= + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float centerDepth = DepthTexture[dtid]; + + // HMD mask pixels (depth >= 1.0 in reversed-Z) — always skip + if (centerDepth >= 1.0) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Debug mode 1: depth map diagnostic — show mode texture as solid colors (all pixels) + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); + return; + } + + // Debug mode 2: full blend depth visualizer — cyan tint based on proximity to FullBlendDistance + if (DebugMode == 2) { + if (centerDepth < 1e-5 || centerDepth >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(centerDepth); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // Debug mode 3: POM depth data visualizer — show Reflectance.w as color + if (DebugMode == 3) { + float pomVal = ReflectanceTexture[dtid].w; + float4 c = ColorTexture[dtid]; + if (pomVal > 1e-2) { + // POM pixel: red-to-green gradient based on parallaxAmount + // Red = peak (high pomVal, closer to camera), Green = valley (low pomVal, farther), Yellow = geometry plane + float3 pomColor = float3(pomVal, 1.0 - pomVal, 0); + OutputRW[dtid] = float4(lerp(c.rgb, pomColor, 0.7), c.a); + } + // Non-POM pixels (pomVal ~ 0) left untouched + return; + } + + // MODE_DISOCCLUDED: fully shaded, leave untouched + if (pixelMode == MODE_DISOCCLUDED) + return; + + // MODE_FULL_BLEND: bilateral blend for 2x supersampling + if (pixelMode == MODE_FULL_BLEND) { + float4 center = ColorTexture[dtid]; + + // Check for POM depth offset at this pixel + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float reprojDepthFB = centerDepth; + float pomOffsetFB = ReflectanceTexture[dtid].w; + if (pomOffsetFB > 1e-2 && POMDepthScale > 0) { + float linDepthFB = SharedData::GetScreenDepth(centerDepth); + float depthCorrectionFB = (0.5 - pomOffsetFB) * POMDepthScale; + float newLinDepthFB = max(linDepthFB + depthCorrectionFB, 1e-4); + reprojDepthFB = (SharedData::CameraData.x - SharedData::CameraData.w / newLinDepthFB) / SharedData::CameraData.z; + } + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepthFB, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = SampleReprojectedColor(r.otherStereoUV, FrameDim); + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + float4 result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + + OutputRW[dtid] = result; + return; + } + + if (eyeIndex == 0) { + // Eye 0 (left eye): fully shaded for all modes — only apply debug tint to edge pixels + if (DebugEdgeTint > 0 && pixelMode == MODE_EDGE) { + float4 c = ColorTexture[dtid]; + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), DebugEdgeTint), c.a); + } + return; + } + + // Eye 1 (right eye): reproject all non-disoccluded, non-full-blend pixels + // (MAIN, EDGE) from Eye 0 (left eye). In VR stereo rendering, Eye 0 is + // fully shaded; Eye 1 pixels marked as reprojectable by StencilCS are + // filled with reprojected color from Eye 0 to save GPU work. + // StencilCS already performed the authoritative disocclusion check with the correct + // depth buffer state — no redundant depth agreement check here. + float reprojDepth = centerDepth; + + // First-pass reprojection to find Eye 0 source pixel + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); + if (!r.valid) + return; + + // Save first-pass result as fallback before POM adjustment + Stereo::StereoBilateralResult firstPassR = r; + + // Read POM offset from Eye 0 source's reflectance.w + // pixelOffset = parallaxAmount (0-1) from ExtendedMaterials, 0.5 = geometry plane. + // Values > 0.5 are peaks (closer to camera), < 0.5 are valleys (farther from camera). + // Correction: high pomVal should push depth closer (smaller linear depth), + // so we use (0.5 - pomOffset) to get a negative correction for peaks. + // Non-POM pixels store 0.0, so threshold > 1e-2 distinguishes them. + float pomOffset = ReflectanceTexture[r.otherPx].w; + if (pomOffset > 1e-2) { + // Re-reproject with POM-adjusted depth centered at geometry plane + float linearDepth = SharedData::GetScreenDepth(centerDepth); + float depthCorrection = (0.5 - pomOffset) * POMDepthScale; + float newLinearDepth = max(linearDepth + depthCorrection, 1e-4); + reprojDepth = (SharedData::CameraData.x - SharedData::CameraData.w / newLinearDepth) / SharedData::CameraData.z; + r = Stereo::ReprojectToOtherEye(uv, reprojDepth, eyeIndex, FrameDim); + if (!r.valid) + r = firstPassR; // Fall back to non-POM reprojection + } + + // Skip if the Eye 0 source pixel is sky/unrendered (depth at clear value). + // At DeferredPasses time, sky hasn't rendered yet — source would have clear color. + // Let the sky/water pass fill these pixels later instead. + float sourceDepth = DepthTexture[r.otherPx]; + if (sourceDepth >= 1.0 || sourceDepth < 1e-5) { + // POM adjustment landed on sky — try the original first-pass source + if (r.otherPx.x != firstPassR.otherPx.x || r.otherPx.y != firstPassR.otherPx.y) { + float fallbackDepth = DepthTexture[firstPassR.otherPx]; + if (fallbackDepth < 1.0 && fallbackDepth >= 1e-5) { + r = firstPassR; + } else { + return; + } + } else { + return; + } + } + + OutputRW[dtid] = SampleReprojectedColor(r.otherStereoUV, FrameDim); + MotionRW[dtid] = MotionRW[r.otherPx]; + +#else // Normal bilateral blend path + +# ifdef EYE0_ONLY + // Only process Eye 0 (left half) - Eye 1 left untouched + float2 uvCheck = (dtid + 0.5) * RcpFrameDim; + if (Stereo::GetEyeIndexFromTexCoord(uvCheck) == 1) + return; +# endif + float2 uv = (dtid + 0.5) * RcpFrameDim; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); @@ -78,10 +294,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) if (r.valid) { float otherDepth = DepthTexture[r.otherPx]; - // Destination edge detection: skip if the reprojected pixel is near the HMD - // mask boundary or at a depth discontinuity in the other eye. Due to VR - // parallax the arm silhouette appears at a different screen position per eye, - // so the reprojection can cross a boundary invisible from this eye. float4 dstEdgeDepths = SampleCrossDepths(r.otherPx, kEdgeMargin, 1 - eyeIndex); if (any(dstEdgeDepths < 1e-5) || Stereo::MaxDepthDiff(otherDepth, dstEdgeDepths) > kEdgeDepthThreshold) { debugState = 2; @@ -89,9 +301,6 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float4 otherColor = ColorTexture[r.otherPx]; Stereo::FinalizeStereoBlend(r, uv, centerDepth, otherDepth, eyeIndex, FrameDim, DepthSigma, MaxBlendFactor); - // Only blend where the two eyes actually disagree (screen-space effect - // inconsistency). Luminance difference below the threshold means both - // eyes computed the same result and blending would only destroy parallax. float colorDiff = abs(dot(centerColor.rgb, float3(0.2126, 0.7152, 0.0722)) - dot(otherColor.rgb, float3(0.2126, 0.7152, 0.0722))); float colorGate = smoothstep(ColorDiffThreshold * 0.5, ColorDiffThreshold * 2.0, colorDiff); @@ -106,7 +315,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } } -#ifdef DEBUG_BACKCHECK +# ifdef DEBUG_BACKCHECK // Debug visualization (6 states): // Blue = mask/sky: skipped // Yellow = source edge: depth discontinuity at this pixel @@ -123,7 +332,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) float3(0.5, 0.0, 0.0) // 5: back-check failed - red }; OutputRW[dtid] = float4(lerp(centerColor.rgb, debugColors[debugState], 0.7), centerColor.a); -#elif defined(DEBUG_BLEND_WEIGHT) +# elif defined(DEBUG_BLEND_WEIGHT) // Blend weight heatmap: only pixels with actual blend activity are colorized. // Untouched pixels pass through unmodified. float w = saturate(r.blendWeight / max(MaxBlendFactor, 1e-5)); @@ -133,7 +342,7 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = centerColor; } -#elif defined(DEBUG_EDGE_DETECTION) +# elif defined(DEBUG_EDGE_DETECTION) // Edge detection visualizer: highlights pixels excluded by depth discontinuity checks. // Non-edge pixels show the normal blended output for scene context. // Bright yellow = source edge: discontinuity at this pixel @@ -145,7 +354,9 @@ float4 SampleCrossDepths(int2 center, int offset, uint eyeIndex) } else { OutputRW[dtid] = blendedColor; } -#else +# else OutputRW[dtid] = blendedColor; -#endif +# endif + +#endif // STEREO_OVERWRITE } diff --git a/package/Shaders/VR/VRPostProcessCS.hlsl b/package/Shaders/VR/VRPostProcessCS.hlsl new file mode 100644 index 0000000000..770e244553 --- /dev/null +++ b/package/Shaders/VR/VRPostProcessCS.hlsl @@ -0,0 +1,109 @@ +// VR Post-Process - Bilateral blend for near-camera 2x supersampling +// +// Runs after all compositing and stereo blending is complete. +// Reads per-pixel classification from StencilCS and applies: +// - MODE_FULL_BLEND: bilateral depth-weighted blend for 2x supersampling +// +// Only MODE_FULL_BLEND pixels are processed. All others pass through untouched. + +#include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" + +Texture2D ColorTexture : register(t0); // Copy of final composited image +Texture2D ModeTexture : register(t1); +Texture2D DepthTexture : register(t2); + +RWTexture2D OutputRW : register(u0); + +cbuffer VRPostProcessCB : register(b1) +{ + float2 FrameDim; + float2 RcpFrameDim; + float DebugEdgeTint; // 0 = off, >0 = debug visualization strength + uint DebugMode; // 0 = normal, 1 = depth map diagnostic, 2 = full blend depth visualizer + float FullBlendDistance; // Linearized depth threshold for full blend zone visualization + float _pad; // Pad to 16-byte alignment +}; + +#include "VRStereoOptimizations/modes.hlsli" + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + if (any(dtid >= uint2(FrameDim))) + return; + + uint pixelMode = ModeTexture[dtid]; + + // Depth map diagnostic: show mode texture contents as solid colors + if (DebugMode == 1) { + float4 c = ColorTexture[dtid]; + if (pixelMode == MODE_EDGE) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 0), 0.5), c.a); + else if (pixelMode == MODE_EDGE_NEIGHBOUR) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0, 1), 0.5), c.a); + else if (pixelMode == MODE_DISOCCLUDED) + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 0.5, 1), 0.3), c.a); + else if (pixelMode == MODE_FULL_BLEND) + OutputRW[dtid] = float4(lerp(c.rgb, float3(1, 0.5, 0), 0.5), c.a); // Orange = full blend zone + return; + } + + // Full blend depth visualizer: shows the depth boundary as a cyan tint + if (DebugMode == 2) { + float2 uvDb = (dtid + 0.5) * RcpFrameDim; + float depthDb = DepthTexture[dtid]; + if (depthDb < 1e-5 || depthDb >= 1.0) + return; + float linDepth = SharedData::GetScreenDepth(depthDb); + if (linDepth < FullBlendDistance) { + float4 c = ColorTexture[dtid]; + float proximity = saturate(1.0 - linDepth / max(FullBlendDistance, 1.0)); + OutputRW[dtid] = float4(lerp(c.rgb, float3(0, 1, 1), proximity * 0.4), c.a); + } + return; + } + + // Only process full blend pixels + if (pixelMode != MODE_FULL_BLEND) + return; + + float2 uv = (dtid + 0.5) * RcpFrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + float4 result = ColorTexture[dtid]; + + // === MODE_FULL_BLEND: bilateral blend for 2x supersampling === + { + float4 center = result; + float centerDepth = DepthTexture[dtid]; + + // Reproject to the other eye + Stereo::StereoBilateralResult r = Stereo::ReprojectToOtherEye(uv, centerDepth, eyeIndex, FrameDim); + if (!r.valid) { + // Debug tint for failed reprojection + if (DebugEdgeTint > 0) + OutputRW[dtid] = float4(lerp(center.rgb, float3(1, 0.5, 0), DebugEdgeTint), center.a); + return; + } + + // Only blend with pixels that have valid composited data in both eyes. + uint otherMode = ModeTexture[r.otherPx]; + if (otherMode != MODE_FULL_BLEND && otherMode != MODE_DISOCCLUDED) + return; + + float4 otherColor = ColorTexture[r.otherPx]; + float otherDepth = DepthTexture[r.otherPx]; + + // Depth-weighted bilateral blend + float maxDepth = max(max(centerDepth, otherDepth), 1e-5); + float depthAgreement = 1.0 - saturate(abs(centerDepth - otherDepth) / maxDepth / 0.02); + float blendWeight = 0.5 * depthAgreement; + + result = lerp(center, otherColor, blendWeight); + + if (DebugEdgeTint > 0) + result.rgb = lerp(result.rgb, float3(0, 1, 1), DebugEdgeTint); + } + + OutputRW[dtid] = result; +} diff --git a/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl new file mode 100644 index 0000000000..bd34d26d58 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/ReprojectionCS.hlsl @@ -0,0 +1,55 @@ +// VR Stereo Optimizations - Reprojection Compute Shader +// +// Fills Eye 1 pixels that were stencil-culled during rendering by reprojecting +// color data from Eye 0. Only operates on pixels classified as MODE_MAIN. +// +// Reads Eye 0 color directly from the OutputRW UAV (left half) and writes to +// Eye 1 (right half). No read-write conflict because reads and writes target +// strictly different halves of the texture. +// +// Input: +// t0 = Depth buffer +// t1 = Per-pixel mode classification texture +// Output: +// u0 = Main render target UAV (reads Eye 0, writes Eye 1) + +#include "Common/VR.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); +Texture2D ModeTexture : register(t1); + +RWTexture2D OutputRW : register(u0); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + uint eyeWidth = (uint)FrameDim.x / 2; + uint eyeHeight = (uint)FrameDim.y; + + if (any(dtid >= uint2(eyeWidth, eyeHeight))) + return; + + // dtid is in Eye 1 local coords; convert to stereo buffer coords + uint2 stereoCoord = uint2(dtid.x + eyeWidth, dtid.y); + + // Only fill pixels that were marked for reprojection + // Mode texture is full SBS resolution, so use stereoCoord for Eye 1 + uint mode = ModeTexture[stereoCoord]; + if (mode != MODE_MAIN) + return; + + float depth = DepthTexture[stereoCoord]; + + // Compute mono UV for this Eye 1 pixel + float2 stereoUV = (float2(stereoCoord) + 0.5) * RcpFrameDim; + float2 monoUV = Stereo::ConvertFromStereoUV(stereoUV, 1); + + // Reproject to Eye 0 and sample color + float3 otherEyeUV = Stereo::ConvertMonoUVToOtherEye(float3(monoUV, depth), 1); + float2 eye0StereoUV = Stereo::ConvertToStereoUV(otherEyeUV.xy, 0); + int2 eye0Px = clamp(int2(eye0StereoUV * FrameDim), int2(0, 0), int2(FrameDim) - 1); + + float4 reprojectedColor = OutputRW[eye0Px]; + + // Write to Eye 1 in the main render target + OutputRW[stereoCoord] = reprojectedColor; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilCS.hlsl b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl new file mode 100644 index 0000000000..1709796234 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilCS.hlsl @@ -0,0 +1,153 @@ +// VR Stereo Optimizations - Stencil Classification Compute Shader +// +// Classifies BOTH eyes over the full SBS buffer. Each pixel is tagged as: +// MODE_DISOCCLUDED - Must be fully shaded (sky, HMD mask, parallax-occluded) +// MODE_EDGE - Depth edge boundary (dist 1) or inner/foreground band; fully shaded + bilateral blend +// MODE_MAIN - Standard pixel eligible for reprojection / bilateral blend +// MODE_FULL_BLEND - Near-camera geometry: both eyes fully shaded for 2x supersampling +// +// Dispatched over full SBS resolution (FrameDim.x x FrameDim.y). + +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D DepthTexture : register(t0); + +RWTexture2D ModeTextureRW : register(u0); + +[numthreads(8, 8, 1)] void main(uint2 dtid : SV_DispatchThreadID) { + if (any(dtid >= uint2(FrameDim))) + return; + + // Determine which eye this pixel belongs to + float2 uv = (float2(dtid) + 0.5) / FrameDim; + uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); + + // Read depth directly in SBS coords + float centerDepth = DepthTexture[dtid]; + +#ifdef DEBUG_DEPTH_MAP + // DIAGNOSTIC: Visualize what depth values StencilCS sees. + // Green (MODE_EDGE) = depth >= 1.0 (HMD mask threshold) + // Magenta (MODE_EDGE_NEIGHBOUR) = depth < 1e-5 (sky threshold) + // No tint (MODE_MAIN) = normal geometry with valid depth + if (centerDepth >= 1.0) { + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + if (centerDepth < 1e-5) { + ModeTextureRW[dtid] = MODE_EDGE_NEIGHBOUR; + return; + } + ModeTextureRW[dtid] = MODE_MAIN; + return; +#endif + + // Sky/unrendered pixels (depth >= 1.0 at z-prepass time = depth buffer clear value) + // and HMD mask pixels both have depth >= 1.0 here. Treat them the same as sky: + // let edge detection run so geometry-vs-sky boundaries get classified. + // HMD mask pixels are in lens corners with no nearby geometry, so they'll + // fall through to MODE_DISOCCLUDED at the end. + bool isSky = (centerDepth < 1e-5) || (centerDepth >= 1.0); + float linCenter = isSky ? 999999.0 : SharedData::GetScreenDepth(centerDepth); + + // Near-camera supersampling: geometry closer than FullBlendDistance gets full + // shading in both eyes for bilateral blend (2x supersampling in VRPostProcess). + if (!isSky && linCenter < FullBlendDistance) { + ModeTextureRW[dtid] = MODE_FULL_BLEND; + return; + } + + // --- Disocclusion detection via reprojection (runs for all non-sky pixels) --- + // Early return: disoccluded pixels are always MODE_DISOCCLUDED regardless of edge proximity. + // This ensures MinEdgeDistance never affects disocclusion classification. + if (!isSky) { + Stereo::StereoBilateralResult reproj = Stereo::ReprojectToOtherEye( + uv, + centerDepth, + eyeIndex, + FrameDim); + + bool isDisoccluded = false; + if (!reproj.valid) { + isDisoccluded = true; + } else { + float otherDepth = DepthTexture[reproj.otherPx]; + // Raw reversed-Z depth comparison for disocclusion detection. + // Using raw depth avoids concentric semicircle artifacts that occur + // with linearized depth due to precision band boundaries in the + // hyperbolic depth-to-linear conversion. + float maxRaw = max(max(centerDepth, otherDepth), 1e-7); + float rawRelDiff = abs(centerDepth - otherDepth) / maxRaw; + isDisoccluded = (rawRelDiff > DisocclusionThreshold); + } + + if (isDisoccluded) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + } + + // Depth gate: skip edge detection for nearby geometry (saves perf, distant AA matters more) + // Sky pixels always run edge detection — they need to expand the edge band outward. + // Disocclusion detection (above) is independent of this gate and always runs. + bool skipEdgeDetection = !isSky && (linCenter < MinEdgeDistance); + + // --- Edge detection with two-tier classification --- + // MODE_EDGE: immediate neighbor (distance 1) has depth discontinuity, OR + // inner/foreground band (distance <= kInnerWidth). + static const uint kInnerWidth = 2; + int2 offsets[4] = { int2(-1, 0), int2(1, 0), int2(0, -1), int2(0, 1) }; + + uint nearestEdgeDist = 0xFFFFFFFF; // nearest distance at which a discontinuity was found + bool nearestWeAreOuter = false; // whether we are on the background side at that nearest hit + + // Use the larger of inner/outer widths for the search + uint maxWidth = kInnerWidth; + + if (!skipEdgeDetection) { + [loop] for (uint d = 1; d <= maxWidth; d++) + { + [unroll] for (int i = 0; i < 4; i++) + { + int2 rawNeighbor = int2(dtid) + offsets[i] * (int)d; + uint2 neighborCoord = Stereo::ClampToEyeBounds(rawNeighbor, eyeIndex, FrameDim); + + float neighborDepth = DepthTexture[neighborCoord]; + bool neighborIsSky = (neighborDepth < 1e-5) || (neighborDepth >= 1.0); + float linNeighbor = neighborIsSky ? 999999.0 : SharedData::GetScreenDepth(neighborDepth); + float maxLin = max(max(linCenter, linNeighbor), 1e-5); + float relDepthDiff = abs(linCenter - linNeighbor) / maxLin; + + if (relDepthDiff > EdgeDepthThreshold && d < nearestEdgeDist) { + nearestEdgeDist = d; + nearestWeAreOuter = (linNeighbor < linCenter); // neighbor closer to camera = we are background + } + } + } + + } // !skipEdgeDetection + + if (nearestEdgeDist != 0xFFFFFFFF) { + // Classify based on distance and side + if (nearestEdgeDist == 1) { + // Immediate neighbor discontinuity: always MODE_EDGE regardless of side + ModeTextureRW[dtid] = MODE_EDGE; + return; + } else if (!nearestWeAreOuter && nearestEdgeDist <= kInnerWidth) { + // Inner/foreground band beyond distance 1 + ModeTextureRW[dtid] = MODE_EDGE; + return; + } + } + + // Sky pixels that aren't near edges -> disoccluded (reprojection is meaningless for sky) + if (isSky) { + ModeTextureRW[dtid] = MODE_DISOCCLUDED; + return; + } + + // Standard pixel + ModeTextureRW[dtid] = MODE_MAIN; +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl new file mode 100644 index 0000000000..c45c2a2409 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWritePS.hlsl @@ -0,0 +1,54 @@ +// VR Stereo Optimizations - Stencil Write Pixel Shader +// +// Reads from the per-pixel mode classification texture and depth texture. +// Discards pixels that should NOT be stencil-culled: +// - MODE_DISOCCLUDED (0) = fully shaded in Eye 1, no reprojection needed +// - MODE_FULL_BLEND (4) = near-camera pixels fully shaded in both eyes for supersampling +// - Sky/HMD-mask pixels (depth >= 1.0 or depth < 1e-5) = need normal rendering +// in the sky pass; they keep their MODE_EDGE tag in +// the mode texture for VRPostProcess but must not be stencil-culled. +// +// Only geometry MODE_MAIN/MODE_EDGE pixels survive and get stencil ref=1 written. +// +// Mode texture is full SBS resolution (same as render target). +// The DSS is configured with StencilFunc=ALWAYS, StencilPassOp=REPLACE, ref=1. +// Pixels that survive (not discarded) get stencil=1 written. + +#include "VRStereoOptimizations/cbuffers.hlsli" + +Texture2D ModeTexture : register(t0); +Texture2D DepthTexture : register(t1); + +struct PS_INPUT +{ + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; +}; + +void main(PS_INPUT input) +{ + // Mode texture is full SBS resolution — SV_Position maps directly + // (viewport is Eye 1 half, so SV_Position.x starts at eyeWidth) + int2 modeCoord = int2(input.Position.xy); + + uint mode = ModeTexture[modeCoord]; + + // MODE_MAIN and MODE_EDGE in Eye 1 write stencil ref=1 (reprojectable). + // These are reprojected from Eye 0; MODE_DISOCCLUDED and MODE_FULL_BLEND are fully shaded in Eye 1. + if (mode == MODE_DISOCCLUDED) + discard; + + // Sky/HMD-mask pixels must not be stencil-culled regardless of edge classification. + // They keep their MODE_EDGE tag in the mode texture for VRPostProcess, + // but must render normally in the sky pass (which runs after stencil culling). + float depth = DepthTexture[modeCoord]; + if (depth >= 1.0 || depth < 1e-5) + discard; + + // MODE_FULL_BLEND: near-camera pixels fully shaded in both eyes for supersampling + if (mode == MODE_FULL_BLEND) + discard; + + // Pixel survives: DSS writes stencil ref=1 + // No color output (no RTV bound) +} diff --git a/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl new file mode 100644 index 0000000000..353aa53379 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/StencilWriteVS.hlsl @@ -0,0 +1,24 @@ +// VR Stereo Optimizations - Stencil Write Vertex Shader +// +// Procedural fullscreen triangle covering Eye 1 (right half of SBS buffer). +// No vertex buffer needed — vertex positions are generated from SV_VertexID. +// The viewport is set to Eye 1 by the C++ code, so we just emit a standard +// fullscreen triangle in clip space. + +struct VS_OUTPUT +{ + float4 Position: SV_Position; + float2 TexCoord: TEXCOORD0; +}; + +VS_OUTPUT main(uint vertexID : SV_VertexID) +{ + VS_OUTPUT output; + + // Fullscreen triangle: 3 vertices covering [-1,1] clip space + float2 uv = float2((vertexID << 1) & 2, vertexID & 2); + output.Position = float4(uv * float2(2, -2) + float2(-1, 1), 0, 1); + output.TexCoord = uv; + + return output; +} diff --git a/package/Shaders/VRStereoOptimizations/cbuffers.hlsli b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli new file mode 100644 index 0000000000..60a900387c --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/cbuffers.hlsli @@ -0,0 +1,31 @@ +// VR Stereo Optimizations - Shared constant buffer layout +// Must match VRStereoOptParams in VRStereoOptimizations.h exactly + +#ifndef __VR_STEREO_OPT_CBUFFERS_HLSLI__ +#define __VR_STEREO_OPT_CBUFFERS_HLSLI__ + +cbuffer VRStereoOptParams : register(b1) +{ + float2 FrameDim; // Full stereo buffer dimensions (both eyes) + float2 RcpFrameDim; // 1.0 / FrameDim + + uint StereoModeValue; // 0=Off, 1=Enable + float DisocclusionThreshold; // Depth difference threshold for disocclusion detection + float EdgeDepthThreshold; // Relative depth difference threshold for edge detection + uint EdgeWidth; // Half-width of edge detection band in pixels + + float2 QualityJitter; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; // Radius of foveal region in UV space + float pad2; + + float2 FoveatedCenter; // Center of foveal region in UV space + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth below which pixels get MODE_FULL_BLEND (game units) +}; + +#define STEREO_MODE_OFF 0 +#define STEREO_MODE_ENABLE 1 + +#include "VRStereoOptimizations/modes.hlsli" + +#endif diff --git a/package/Shaders/VRStereoOptimizations/modes.hlsli b/package/Shaders/VRStereoOptimizations/modes.hlsli new file mode 100644 index 0000000000..b693dedcc3 --- /dev/null +++ b/package/Shaders/VRStereoOptimizations/modes.hlsli @@ -0,0 +1,10 @@ +#ifndef __VR_STEREO_OPT_MODES_HLSLI__ +#define __VR_STEREO_OPT_MODES_HLSLI__ + +#define MODE_DISOCCLUDED 0 +#define MODE_EDGE 1 +#define MODE_MAIN 2 +#define MODE_EDGE_NEIGHBOUR 3 +#define MODE_FULL_BLEND 4 + +#endif diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 0106b7449d..ee6762fcdb 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -279,6 +279,11 @@ void Deferred::StartDeferred() PrepassPasses(); OverrideBlendStates(); + + // VR: Classify Eye 1 pixels and write hardware stencil marks before geometry rendering + if (globals::game::isVR) { + globals::features::vr.stereoOpt.DispatchStencil(); + } } void Deferred::DeferredPasses() @@ -367,6 +372,13 @@ void Deferred::DeferredPasses() context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + // Bind VRStereoOptimizations mode texture for Eye 1 skip + auto& vrStereoOpt = globals::features::vr.stereoOpt; + if (vrStereoOpt.loaded) { + ID3D11ShaderResourceView* modeSRV = vrStereoOpt.GetModeTextureSRV(); + context->CSSetShaderResources(16, 1, &modeSRV); + } + ID3D11UnorderedAccessView* uavs[3]{ main.UAV, normals.UAV, motionVectors.UAV }; context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); @@ -374,13 +386,28 @@ void Deferred::DeferredPasses() context->CSSetShader(shader, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + + // Unbind mode texture SRV + if (vrStereoOpt.loaded) { + ID3D11ShaderResourceView* nullSRV = nullptr; + context->CSSetShaderResources(16, 1, &nullSRV); + } + } + + // VR: Deactivate stencil culling now that geometry rendering is complete. + // Must happen before StereoBlend so the blend pass itself isn't stencil-blocked. + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vr.stereoOpt; + if (stereoOpt.IsStencilActive()) { + stereoOpt.DeactivateStencil(); + } } - // VR stereo consistency blend - depth-aware bilateral blend at the eye seam - // Runs after composite as a general safety net for all screen-space effects. - // Must run before clearing b12/b13 -- needs FrameBuffer matrices for reprojection. - if (globals::game::isVR) + // VR: Stereo reprojection fills Eye 1 holes here (after DeferredComposite, before SSR/water/sky) + // so that ISReflectionsRayTracing sees valid pixels in both eyes. + if (globals::game::isVR) { globals::features::vr.DrawStereoBlend(); + } // Clear { @@ -479,6 +506,10 @@ void Deferred::OverrideBlendStates() blendDesc.RenderTarget[i].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; } + // RT[5] = REFLECTANCE: enable alpha writes for POM depth data + // stored in Reflectance.w, used by StereoBlendCS for depth-aware reprojection + blendDesc.RenderTarget[5].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[a][b][c][d])); } else { deferredBlendStates[a][b][c][d] = nullptr; @@ -555,6 +586,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainComposite() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vr.stereoOpt.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeCS; @@ -580,6 +614,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainCompositeInterior() if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); + if (REL::Module::IsVR() && globals::features::vr.stereoOpt.loaded) + defines.push_back({ "VR_STEREO_OPT", nullptr }); + mainCompositeInteriorCS = static_cast(Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", defines, "cs_5_0")); } return mainCompositeInteriorCS; @@ -597,6 +634,7 @@ void Deferred::Hooks::Main_RenderWorld::thunk(bool a1) state->permutationData.ExtraShaderDescriptor |= static_cast(State::ExtraShaderDescriptors::InWorld); state->inWorld = true; func(a1); + state->inWorld = false; state->permutationData.ExtraShaderDescriptor &= ~static_cast(State::ExtraShaderDescriptors::InWorld); }; diff --git a/src/Features/ExtendedMaterials.h b/src/Features/ExtendedMaterials.h index 10519a9a4f..e4fb5c7440 100644 --- a/src/Features/ExtendedMaterials.h +++ b/src/Features/ExtendedMaterials.h @@ -36,7 +36,7 @@ struct ExtendedMaterials : Feature uint ExtendShadows = 1; uint EnableParallaxWarpingFix = 1; - float pad[1]; + uint pad0 = 0; }; STATIC_ASSERT_ALIGNAS_16(Settings); diff --git a/src/Features/TAAReorder.cpp b/src/Features/TAAReorder.cpp new file mode 100644 index 0000000000..6d1f56dfcc --- /dev/null +++ b/src/Features/TAAReorder.cpp @@ -0,0 +1,612 @@ +/** + * @brief TAA Periphery Reordering for VR DLSS Viewport Scaling + * + * This implementation follows the approach pioneered by PureDark's Skyrim Upscaler + * (https://github.com/PureDark/Skyrim-Upscaler/tree/VR), which demonstrated how to + * reorder Skyrim's post-processing pipeline to run vanilla TAA on the periphery while + * DLSS processes a cropped center region. No code was copied; the approach was used as + * a reference for the conductor/hook architecture. + * + * PureDark's Skyrim Upscaler is licensed under the MIT License: + * Copyright (c) 2022 PureDark + * https://github.com/PureDark/Skyrim-Upscaler/blob/VR/LICENSE + */ +#include "TAAReorder.h" + +#include "Globals.h" +#include "Upscaling.h" +#include +#include + +namespace TAAReorder +{ + bool ShouldReorderTAA() + { + if (!g_initialized) + return false; + auto& upscaling = globals::features::upscaling; + return globals::game::isVR && + upscaling.settings.vrPeripheryTAA && + upscaling.settings.vrDlssViewportScale < 1.0f && + upscaling.GetUpscaleMethod() == Upscaling::UpscaleMethod::kDLSS; + } + + // ─── Setter A: Force TAA (pass-through) ─── + void ForceTAASetter::thunk() + { + func(); + } + + // ─── Setter B: TAA State Machine (pass-through) ─── + void TAAStateMachine::thunk() + { + func(); + } + + // ─── EnsurePostPPCopy: create/resize staging texture matching source ─── + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex) + { + D3D11_TEXTURE2D_DESC srcDesc; + sourceTex->GetDesc(&srcDesc); + + if (g_postPPCopy) { + D3D11_TEXTURE2D_DESC existingDesc; + g_postPPCopy->GetDesc(&existingDesc); + if (existingDesc.Width == srcDesc.Width && existingDesc.Height == srcDesc.Height && + existingDesc.Format == srcDesc.Format) + return; + } + + D3D11_TEXTURE2D_DESC desc = srcDesc; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.MiscFlags = 0; + g_postPPCopy = nullptr; + g_postPPCopySRV = nullptr; + globals::d3d::device->CreateTexture2D(&desc, nullptr, g_postPPCopy.put()); + + if (g_postPPCopy) { + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + globals::d3d::device->CreateShaderResourceView(g_postPPCopy.get(), &srvDesc, g_postPPCopySRV.put()); + Util::SetResourceName(g_postPPCopy.get(), "TAAReorder_PostPPCopy"); + } + } + + // ─── Helper: set up common fullscreen rendering state ─── + static void SetupFullscreenState(ID3D11DeviceContext* context, float vpX, float vpY, float vpW, float vpH) + { + D3D11_VIEWPORT viewport = {}; + viewport.TopLeftX = vpX; + viewport.TopLeftY = vpY; + viewport.Width = vpW; + viewport.Height = vpH; + viewport.MaxDepth = 1.0f; + + auto& upscaling = globals::features::upscaling; + context->RSSetViewports(1, &viewport); + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + context->OMSetBlendState(upscaling.upscaleBlendState.get(), nullptr, 0xffffffff); + } + + // ─── Helper: draw fullscreen triangle (point-sample format-converting copy) ─── + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH) + { + auto& upscaling = globals::features::upscaling; + auto context = globals::d3d::context; + + SetupFullscreenState(context, vpX, vpY, vpW, vpH); + context->PSSetShader(upscaling.GetDlssCompositePS(), nullptr, 0); + + ID3D11ShaderResourceView* srvs[] = { srcSRV }; + context->PSSetShaderResources(0, 1, srvs); + + ID3D11RenderTargetView* rtvs[] = { dstRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + context->Draw(3, 0); + } + + // ─── ExecutePass hook: capture Phase 2A output, detect Phase 5 ─── + void ExecutePassHook::thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag) + { + bool isPeripheryTAA = ShouldReorderTAA(); + bool shouldLog = (g_diagCounter == 0); + + // Compute pass index for Phase 2A / Phase 5 detection + int passIndex = -1; + if (isPeripheryTAA || shouldLog) { + uintptr_t managerAddr = (uintptr_t)manager; + uintptr_t passArrayBase = *(uintptr_t*)(managerAddr + 0x28); + if (passArrayBase) { + for (int i = 0; i < 40; i++) { + if (*(uintptr_t*)(passArrayBase + i * 8) == (uintptr_t)passObj) { + passIndex = i; + break; + } + } + } + } + + if (shouldLog) + logger::info("[TAAReorder] ExecutePass: src=0x{:X} dst=0x{:X} flag={} passIdx={}", + srcTech, dstTech, flag, passIndex); + + // Execute the original pass + func(manager, passObj, srcTech, dstTech, extraData, flag); + + // After Phase 2A: copy output RT to g_postPPCopy for DLSS to process + if (isPeripheryTAA && passIndex == 30 && dstTech == 0x29) { + ID3D11RenderTargetView* postRTV = nullptr; + globals::d3d::context->OMGetRenderTargets(1, &postRTV, nullptr); + if (postRTV) { + ID3D11Resource* res = nullptr; + postRTV->GetResource(&res); + if (res) { + ID3D11Texture2D* postTex = nullptr; + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&postTex); + if (postTex) { + EnsurePostPPCopy(postTex); + globals::d3d::context->CopyResource(g_postPPCopy.get(), postTex); + g_postPPReady = true; + if (shouldLog) { + D3D11_TEXTURE2D_DESC desc; + postTex->GetDesc(&desc); + logger::info("[TAAReorder] Phase 2A output: {}x{} fmt={} → copied to g_postPPCopy", + desc.Width, desc.Height, (uint32_t)desc.Format); + } + postTex->Release(); + } + res->Release(); + } + postRTV->Release(); + } + } + + // Detect Phase 5 completion + if (isPeripheryTAA && passIndex == 35) { + g_phase5Complete = true; + if (shouldLog) + logger::info("[TAAReorder] Phase 5 complete (passIdx=35)"); + } + + // ─── Deferred DLSS paste: after Phase 5 (TAA) completes ─── + // TAA has now processed the entire submit texture (periphery is anti-aliased). + // Paste the sharp DLSS center on top, overwriting the TAA'd center region. + if (isPeripheryTAA && passIndex == 35 && g_dlssReady && g_submitTexForPaste && g_postPPCopy) { + auto context = globals::d3d::context; + auto& upscaling = globals::features::upscaling; + + auto screenSize = globals::state->screenSize; + uint32_t eyeW = (uint32_t)(screenSize.x / 2); + uint32_t eyeH = (uint32_t)screenSize.y; + float vpScale = upscaling.settings.vrDlssViewportScale; + uint32_t centerW = (uint32_t)(eyeW * vpScale); + uint32_t centerH = (uint32_t)(eyeH * vpScale); + uint32_t baseCenterX = (eyeW - centerW) / 2; + uint32_t centerY = (eyeH - centerH) / 2; + + int32_t nasalShift = (int32_t)(upscaling.settings.vrDlssCropOffsetX * eyeW); + + float featherWidth = upscaling.settings.vrDlssFeatherWidth; + float featherPixels = featherWidth * eyeW; + + // Feathered blend path: create RTV from submit texture for PS composite + bool useFeathered = featherPixels > 0.0f && upscaling.vrFeatheredCompositePS && upscaling.vrFeatheredCompositeBlendState; + if (useFeathered) { + D3D11_TEXTURE2D_DESC texDesc; + g_submitTexForPaste->GetDesc(&texDesc); + + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = texDesc.Format; + rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + + ID3D11RenderTargetView* pasteRTV = nullptr; + HRESULT hr = globals::d3d::device->CreateRenderTargetView(g_submitTexForPaste.get(), &rtvDesc, &pasteRTV); + + if (SUCCEEDED(hr) && pasteRTV) { + // Save pipeline state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11RenderTargetView* oldRTV = nullptr; + ID3D11DepthStencilView* oldDSV = nullptr; + context->OMGetRenderTargets(1, &oldRTV, &oldDSV); + + ID3D11VertexShader* oldVS = nullptr; + context->VSGetShader(&oldVS, nullptr, nullptr); + ID3D11PixelShader* oldPS = nullptr; + context->PSGetShader(&oldPS, nullptr, nullptr); + ID3D11InputLayout* oldIL = nullptr; + context->IAGetInputLayout(&oldIL); + D3D11_PRIMITIVE_TOPOLOGY oldTopo; + context->IAGetPrimitiveTopology(&oldTopo); + ID3D11RasterizerState* oldRS = nullptr; + context->RSGetState(&oldRS); + + UINT oldNumVPs = 1; + D3D11_VIEWPORT oldVP; + context->RSGetViewports(&oldNumVPs, &oldVP); + + ID3D11ShaderResourceView* oldPSSRV = nullptr; + context->PSGetShaderResources(0, 1, &oldPSSRV); + ID3D11SamplerState* oldPSSampler = nullptr; + context->PSGetSamplers(0, 1, &oldPSSampler); + ID3D11Buffer* oldPSCB = nullptr; + context->PSGetConstantBuffers(0, 1, &oldPSCB); + + // Ensure CB exists + if (!upscaling.vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + globals::d3d::device->CreateBuffer(&cbDesc, nullptr, upscaling.vrFeatheredCompositeCB.put()); + } + + // Set pipeline state for feathered composite + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + context->VSSetShader(upscaling.GetUpscaleVS(), nullptr, 0); + context->PSSetShader(upscaling.vrFeatheredCompositePS.get(), nullptr, 0); + context->RSSetState(upscaling.upscaleRasterizerState.get()); + + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(upscaling.vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + ID3D11ShaderResourceView* srvs[1] = { g_postPPCopySRV.get() }; + context->PSSetShaderResources(0, 1, srvs); + + ID3D11RenderTargetView* rtvs[1] = { pasteRTV }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + if (!upscaling.vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, upscaling.vrLinearSampler.put()); + } + ID3D11SamplerState* samplers[1] = { upscaling.vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + D3D11_VIEWPORT vp = {}; + vp.TopLeftX = (float)eyeOffset; + vp.TopLeftY = 0.0f; + vp.Width = (float)eyeW; + vp.Height = (float)eyeH; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + uint32_t fullW = eyeW * 2; + uint32_t fullH = eyeH; + float srcUVOriginX = (float)(eyeOffset + offsetCenterX) / (float)fullW; + float srcUVOriginY = (float)centerY / (float)fullH; + float srcUVScaleX = (float)centerW / (float)fullW; + float srcUVScaleY = (float)centerH / (float)fullH; + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(upscaling.vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)(eyeOffset + offsetCenterX), (float)centerY, + (float)centerW, (float)centerH, + featherPixels, 0.0f, + srcUVOriginX, srcUVOriginY, + srcUVScaleX, srcUVScaleY, + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(upscaling.vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { upscaling.vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + context->Draw(3, 0); + } + + if (shouldLog) + logger::info("[TAAReorder] Deferred paste: feathered composite {}x{} at ({},{}) feather={:.1f}px nasalShift={} per-eye", + centerW, centerH, baseCenterX, centerY, featherPixels, nasalShift); + + // Restore pipeline state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + context->RSSetViewports(1, &oldVP); + context->VSSetShader(oldVS, nullptr, 0); + context->PSSetShader(oldPS, nullptr, 0); + context->IASetInputLayout(oldIL); + context->IASetPrimitiveTopology(oldTopo); + context->RSSetState(oldRS); + context->PSSetShaderResources(0, 1, &oldPSSRV); + context->PSSetSamplers(0, 1, &oldPSSampler); + context->PSSetConstantBuffers(0, 1, &oldPSCB); + context->OMSetRenderTargets(1, &oldRTV, oldDSV); + + if (oldBlendState) + oldBlendState->Release(); + if (oldRTV) + oldRTV->Release(); + if (oldDSV) + oldDSV->Release(); + if (oldVS) + oldVS->Release(); + if (oldPS) + oldPS->Release(); + if (oldIL) + oldIL->Release(); + if (oldRS) + oldRS->Release(); + if (oldPSSRV) + oldPSSRV->Release(); + if (oldPSSampler) + oldPSSampler->Release(); + if (oldPSCB) + oldPSCB->Release(); + + pasteRTV->Release(); + } else { + useFeathered = false; + if (shouldLog) + logger::warn("[TAAReorder] Deferred paste: failed to create RTV for feathered path"); + } + } + if (!useFeathered) { + // Hard edge: CopySubresourceRegion + for (uint32_t i = 0; i < 2; i++) { + uint32_t eyeOffset = i * eyeW; + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t offsetCenterX = (uint32_t)std::clamp((int32_t)baseCenterX + eyeNasalShift, 0, (int32_t)(eyeW - centerW)); + + D3D11_BOX srcBox = { + eyeOffset + offsetCenterX, centerY, 0, + eyeOffset + offsetCenterX + centerW, centerY + centerH, 1 + }; + context->CopySubresourceRegion(g_submitTexForPaste.get(), 0, + eyeOffset + offsetCenterX, centerY, 0, + g_postPPCopy.get(), 0, &srcBox); + } + + if (shouldLog) + logger::info("[TAAReorder] Deferred paste: hard-copy {}x{} at ({},{}) nasalShift={} per-eye", + centerW, centerH, baseCenterX, centerY, nasalShift); + } + + g_dlssPasteComplete = true; + g_submitTexForPaste = nullptr; // Release reference + } + } + + // ─── BSImagespaceShader hook: DLSS eval + paste after pipeline completes ─── + // Wraps call at 0x132C827 (write_thunk_call). func() encompasses the + // conductor (Phase 2A) but NOT Phase 5 (TAA+DRS) — Phase 5 runs after us. + // We evaluate DLSS on the captured Phase 2A output and save the submit + // texture reference. The actual paste happens in ExecutePassHook after + // Phase 5 (passIdx=35) completes, so TAA only processes the periphery + // and the sharp DLSS center overwrites the TAA'd center region. + void BSImagespaceShaderHook::thunk(void* a_this, uint64_t a_param) + { + func(a_this, a_param); + + if (!ShouldReorderTAA()) + return; + + bool shouldLog = (g_diagCounter == 0); + auto context = globals::d3d::context; + auto& upscaling = globals::features::upscaling; + + // Get submit texture from bound RT after pipeline stage completes + ID3D11RenderTargetView* submitRTV = nullptr; + context->OMGetRenderTargets(1, &submitRTV, nullptr); + ID3D11Texture2D* submitTex = nullptr; + if (submitRTV) { + ID3D11Resource* res = nullptr; + submitRTV->GetResource(&res); + if (res) { + res->QueryInterface(__uuidof(ID3D11Texture2D), (void**)&submitTex); + res->Release(); + } + submitRTV->Release(); + } + + if (shouldLog) { + if (submitTex) { + D3D11_TEXTURE2D_DESC desc; + submitTex->GetDesc(&desc); + logger::info("[TAAReorder] BSImagespaceShaderHook: submitTex=0x{:X} {}x{} fmt={} bind=0x{:X} postPPReady={} phase5={}", + (uintptr_t)submitTex, desc.Width, desc.Height, (uint32_t)desc.Format, + desc.BindFlags, g_postPPReady, g_phase5Complete); + } else { + logger::info("[TAAReorder] BSImagespaceShaderHook: no submitTex bound"); + } + } + + // Step 1: Evaluate DLSS on the captured post-PP intermediate + if (g_postPPReady && g_postPPCopy) { + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: evaluating DLSS on g_postPPCopy..."); + + upscaling.Upscale(g_postPPCopy.get()); + g_dlssReady = true; + + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: DLSS evaluation complete"); + } else if (shouldLog) { + logger::info("[TAAReorder] BSImagespaceShaderHook: skip DLSS (postPPReady={} postPPCopy={})", + g_postPPReady, (void*)g_postPPCopy.get()); + } + + // Step 2: Save submit texture for deferred paste after Phase 5 (TAA) + // DO NOT paste here — Phase 5 hasn't run yet and would blur the DLSS center. + // The paste happens in ExecutePassHook after passIdx=35 completes. + if (g_dlssReady && submitTex) { + g_submitTexForPaste.copy_from(submitTex); + if (shouldLog) + logger::info("[TAAReorder] BSImagespaceShaderHook: saved submitTex for deferred paste (waiting for Phase 5)"); + } + + if (submitTex) + submitTex->Release(); + } + + // ─── Depth/stencil registration hook: diagnostic logging ─── + // Tracks dimensions per slot and logs whenever they change. + // data[0]=width, data[1]=height based on initial analysis. + void DepthStencilRegHook::thunk(void* manager, uint32_t slot, void* desc) + { + if (desc && slot < 32) { + auto* data = reinterpret_cast(desc); + static uint32_t lastWidth[32] = {}; + static uint32_t lastHeight[32] = {}; + static uint32_t callCount[32] = {}; + + callCount[slot]++; + bool dimsChanged = (data[0] != lastWidth[slot] || data[1] != lastHeight[slot]); + if (dimsChanged) { + logger::info("[TAAReorder] DepthStencilReg: slot={} {}x{} → {}x{} (call #{}) data[2..7]= {} {} {} {} {} {}", + slot, lastWidth[slot], lastHeight[slot], data[0], data[1], callCount[slot], + data[2], data[3], data[4], data[5], data[6], data[7]); + lastWidth[slot] = data[0]; + lastHeight[slot] = data[1]; + } + } + + func(manager, slot, desc); + } + + // ─── Hidden area mesh render hook: pass-through ─── + // HAM renders normally. Previous "frozen frame" artifacts at the HAM boundary + // were caused by the depth upscaler's conservative blending (GatherRed + lerp) + // leaking depth=0 mask values into valid depth. Fixed in DepthUpscalePS.hlsl + // by switching to pure point sampling. + // HiddenAreaMeshHook removed — the passthrough hook was breaking HAM + // by corrupting the original function via Detours on an unverified RVA. + + // ─── BSOpenVR::Submit hook: diagnostic logging ─── + void SubmitHook::thunk(void* thisPtr, void* textureHandle) + { + if (g_diagCounter == 0 && textureHandle) { + auto tex2d = static_cast(textureHandle); + D3D11_TEXTURE2D_DESC desc = {}; + tex2d->GetDesc(&desc); + auto base = REL::Module::get().base(); + auto retAddr = reinterpret_cast(_ReturnAddress()); + logger::info("[TAAReorder] Submit: tex=0x{:X} {}x{} fmt={} dlssPasted={} callerRVA=0x{:X}", + (uintptr_t)textureHandle, desc.Width, desc.Height, (uint32_t)desc.Format, + g_dlssPasteComplete, retAddr - base); + } + + func(thisPtr, textureHandle); + } + + // ─── Post-processing conductor call hook: pass-through (tracking only) ─── + // Inner conductor call at 0x1325086 inside BSImagespaceShader::Render. + // Only tracks g_insideConductor state. DLSS logic is in BSImagespaceShaderHook. + void ConductorCallHook::thunk(void* a1, void* a2, void* a3, void* a4) + { + g_insideConductor = true; + func(a1, a2, a3, a4); + g_insideConductor = false; + } + + void InitEarly() + { + if (!REL::Module::IsVR()) + return; + + auto base = REL::Module::get().base(); + + // ─── Hook: DepthStencilRegistration (RVA 0x00DC79D0) ─── + // Must be installed before renderer initialization (which registers depth/stencil targets). + // Called from Upscaling::Load(), before D3D device creation. + DepthStencilRegHook::func = reinterpret_cast(base + 0x00DC79D0); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&DepthStencilRegHook::func), reinterpret_cast(DepthStencilRegHook::thunk)); + DetourTransactionCommit(); + + logger::info("[TAAReorder] InitEarly: DepthStencil registration hooked at RVA 0x00DC79D0"); + } + + void Init() + { + if (!REL::Module::IsVR()) + return; + + auto base = REL::Module::get().base(); + + // ─── Core pointers ─── + g_pRendererSingleton = reinterpret_cast(base + 0x034234C0); + + // ─── Hook: ForceTAASetter (RVA 0x005C8EE0) ─── + ForceTAASetter::func = base + 0x005C8EE0; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ForceTAASetter::func), reinterpret_cast(ForceTAASetter::thunk)); + DetourTransactionCommit(); + + // ─── Hook: TAAStateMachine (RVA 0x005C8F10) ─── + TAAStateMachine::func = base + 0x005C8F10; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&TAAStateMachine::func), reinterpret_cast(TAAStateMachine::thunk)); + DetourTransactionCommit(); + + // ─── Hook: ExecutePass (RVA 0x012D2540) ─── + ExecutePassHook::func = reinterpret_cast(base + 0x012D2540); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&ExecutePassHook::func), reinterpret_cast(ExecutePassHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSOpenVR::Submit (RVA 0x00C53920) ─── + SubmitHook::func = reinterpret_cast(base + 0x00C53920); + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&SubmitHook::func), reinterpret_cast(SubmitHook::thunk)); + DetourTransactionCommit(); + + // ─── Hook: BSImagespaceShader via write_thunk_call at RVA 0x132C827 ─── + // Wraps BSImagespaceShader::Render from the Orchestrator level. + // func() encompasses conductor (Phase 2A) + Phase 5 (TAA+DRS) + Submit. + // After func(): DLSS eval + paste. Matches PureDark's BSImagespaceShader_Hook_VR. + stl::write_thunk_call(base + 0x132C827); + + // ─── Hook: Inner conductor call via write_thunk_call at RVA 0x1325086 ─── + // Pass-through, only tracks g_insideConductor state. + stl::write_thunk_call(base + 0x1325086); + + g_initialized = true; + + logger::info("[TAAReorder] Initialized — base=0x{:X}", base); + logger::info("[TAAReorder] Post-pipeline DLSS mode (periphery TAA)"); + logger::info("[TAAReorder] BSImagespaceShader hooked via write_thunk_call at RVA 0x132C827 (DLSS eval + paste)"); + logger::info("[TAAReorder] Inner conductor hooked via write_thunk_call at RVA 0x1325086 (tracking only)"); + logger::info("[TAAReorder] BSOpenVR::Submit hooked at RVA 0x00C53920"); + } +} diff --git a/src/Features/TAAReorder.h b/src/Features/TAAReorder.h new file mode 100644 index 0000000000..e6aa0b8ee2 --- /dev/null +++ b/src/Features/TAAReorder.h @@ -0,0 +1,182 @@ +#pragma once + +// TAA Reordering for VR DLSS Viewport Scaling (Post-Conductor DLSS) +// +// PureDark's approach: DLSS is evaluated AFTER BSImagespaceShader::Render +// completes (which includes the conductor + Phase 5 TAA + DRS). +// +// Flow: +// 1. BSImagespaceShaderHook wraps the call at 0x132C827: +// func() runs → conductor executes Phase 2A +// a. ExecutePassHook captures Phase 2A output to g_postPPCopy +// 2. After func() returns in BSImagespaceShaderHook: +// a. Gets submit texture from bound RT +// b. Evaluates DLSS on g_postPPCopy (post-PP intermediate) +// c. Saves submit texture reference for deferred paste +// 3. Back in ExecutePassHook, Phase 5 (TAA + DRS) runs (passIdx=35): +// a. TAA anti-aliases the entire submit texture (periphery benefits) +// b. After Phase 5 returns: paste DLSS center onto submit texture +// (overwrites TAA'd center with sharp DLSS output) +// 4. Engine continues: Orchestrator → Scaleform Display (UI) → Submit +// 5. Lock DRS + UpdateCameraData (in Main_PostProcessing::thunk after func()) +// +// Both DLSS and TAA get Phase 2A's PP applied: +// - TAA: naturally (Phase 2A runs before Phase 5 in conductor) +// - DLSS: processes the Phase 2A output copy (g_postPPCopy) +// +// All RVAs are VR-specific (SkyrimVR.exe). + +#include +#include +#include +#include + +struct Upscaling; + +namespace TAAReorder +{ + // ─── Function pointer types ─── + using ExecutePass_t = void (*)(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + using BSOpenVRSubmit_t = void (*)(void* thisPtr, void* textureHandle); + + // ─── Resolved global data pointers ─── + inline uintptr_t* g_pRendererSingleton = nullptr; + inline bool g_initialized = false; + + // ─── Diagnostics (rate-limited logging) ─── + inline int g_diagCounter = 0; + inline constexpr int DIAG_INTERVAL = 300; + + // ─── Per-frame sequence counter (for verifying call ordering) ─── + inline int g_frameSeqCounter = 0; + + // ─── ExecutePass hook (conductor interposition) ─── + // RVA: 0x012D2540 — called by the conductor for each render pass. + // Copies Phase 2A output RT to g_postPPCopy for DLSS to process. + struct ExecutePassHook + { + static void thunk(void* manager, void* passObj, int srcTech, int dstTech, void* extraData, uint8_t flag); + static inline ExecutePass_t func = nullptr; + }; + + // ─── BSImagespaceShader hook: DLSS eval after conductor, paste deferred ─── + // Wraps call at 0x132C827 (write_thunk_call). func() encompasses the + // conductor (Phase 2A) but NOT Phase 5 (TAA+DRS) — Phase 5 runs after us. + // We evaluate DLSS on the captured Phase 2A output and save the submit + // texture reference. The actual paste happens in ExecutePassHook after + // Phase 5 (passIdx=35) completes, so TAA only processes the periphery + // and the sharp DLSS center overwrites the TAA'd center region. + struct BSImagespaceShaderHook + { + static void thunk(void* a_this, uint64_t a_param); + static inline REL::Relocation func; + }; + + // ─── Post-processing conductor call hook (pass-through, tracking only) ─── + // RVA: 0x1325086 — inner conductor call inside BSImagespaceShader::Render. + // Only used for g_insideConductor tracking. + struct ConductorCallHook + { + static void thunk(void* a1, void* a2, void* a3, void* a4); + static inline REL::Relocation func; + }; + + // ─── Post-PP copy (Phase 2A output, DLSS color source) ─── + // After Phase 2A completes, ExecutePassHook copies the bound RT here. + // BSImagespaceShaderHook passes this to Upscale() as colorSourceOverride. + // After DLSS, FinalizePerEyeOutputs writes DLSS center back into this texture. + inline winrt::com_ptr g_postPPCopy; + inline winrt::com_ptr g_postPPCopySRV; + inline bool g_postPPReady = false; + + // ─── DLSS evaluation complete flag ─── + // Set after BSImagespaceShaderHook evaluates DLSS on g_postPPCopy. + // Used to gate the DLSS center paste step. + inline bool g_dlssReady = false; + + // ─── DLSS paste complete flag ─── + // Set after ConductorCallHook pastes DLSS center onto submit texture. + inline bool g_dlssPasteComplete = false; + + // ─── Phase 5 tracking ─── + inline bool g_phase5Complete = false; + + // ─── Conductor state tracking ─── + inline bool g_insideConductor = false; + inline int g_bsHookCallCount = 0; + + // ─── RGB-only blend state (may be useful for future feathering) ─── + inline winrt::com_ptr g_rgbOnlyBlendState; + + // ─── Stencil state for HAM-aware compositing ─── + // DepthEnable=false, StencilEnable=true, StencilFunc=EQUAL, StencilRef=0. + // Only writes to pixels where stencil==0 (visible, non-HAM pixels). + // Matches PureDark's approach in Evaluate()/RenderTexture(). + inline winrt::com_ptr g_hamStencilState; + + // ─── Saved submit texture for deferred paste (after Phase 5) ─── + inline winrt::com_ptr g_submitTexForPaste; + + // ─── Cached UAV for submit texture (ClearHMDMask + ForceAlpha on submit after DLSS paste) ─── + inline winrt::com_ptr g_submitTexUAV; + inline ID3D11Texture2D* g_submitTexUAVOwner = nullptr; // track which texture the UAV belongs to + + // ─── ForceAlpha compute shader (sets alpha=1.0 to fix Scaleform UI rendering) ─── + inline winrt::com_ptr g_forceAlphaCS; + + // ─── Setter hook: Setter A (Force TAA) ─── + // RVA: 0x005C8EE0 — unconditional TAA enable. + // Pass-through (we want TAA to run natively). + struct ForceTAASetter + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Setter hook: Setter B (TAA State Machine) ─── + // RVA: 0x005C8F10 — conditional TAA enable/disable. + // Pass-through (we want TAA to run natively). + struct TAAStateMachine + { + static void thunk(); + static inline REL::Relocation func; + }; + + // ─── Depth/stencil registration hook ─── + // RVA: 0x00DC79D0 — registers depth/stencil targets in the RT manager (+0x1388). + // Separate path from color RTs (registered via sub_417980 at +0x1350). + // Hook intercepts registration to log descriptor layout and scale dimensions + // for slots 0, 1, 7 to match display resolution (fixes HAM not being upscaled). + using RegisterDepthStencil_t = void (*)(void* manager, uint32_t slot, void* desc); + struct DepthStencilRegHook + { + static void thunk(void* manager, uint32_t slot, void* desc); + static inline RegisterDepthStencil_t func = nullptr; + }; + + // ─── BSOpenVR::Submit hook (VR frame submission interception) ─── + // RVA: 0x00C53920 — BSOpenVR::Submit, vtable[3]. + // Diagnostic logging only. + struct SubmitHook + { + static void thunk(void* thisPtr, void* textureHandle); + static inline BSOpenVRSubmit_t func = nullptr; + }; + + // Check if TAA reordering should be active based on current settings + bool ShouldReorderTAA(); + + // Ensure g_postPPCopy matches the source texture dimensions/format + void EnsurePostPPCopy(ID3D11Texture2D* sourceTex); + + // Helper: draw fullscreen format-converting copy (Load-based, 1:1 pixel copy). + void DrawFullscreenCopy(ID3D11ShaderResourceView* srcSRV, ID3D11RenderTargetView* dstRTV, + float vpX, float vpY, float vpW, float vpH); + + // Install hooks that must be in place before renderer initialization (depth/stencil reg). + // Call from Upscaling::Load() (VR only). + void InitEarly(); + + // Initialize all pointers and install hooks. Call once from PostPostLoad (VR only). + void Init(); +} diff --git a/src/Features/Upscaling.cpp b/src/Features/Upscaling.cpp index 61774e99b6..0e89c3e338 100644 --- a/src/Features/Upscaling.cpp +++ b/src/Features/Upscaling.cpp @@ -1,8 +1,10 @@ #include "Upscaling.h" #include "Deferred.h" +#include "Features/VR.h" #include "Hooks.h" #include "State.h" +#include "TAAReorder.h" #include "Upscaling/DX12SwapChain.h" #include "Upscaling/FidelityFX.h" #include "Upscaling/Streamline.h" @@ -25,7 +27,11 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( sharpnessFSR, sharpnessDLSS, presetDLSS, - useGatherWideKernel); + useGatherWideKernel, + vrDlssViewportScale, + vrPeripheryTAA, + vrDlssCropOffsetX, + vrDlssFeatherWidth); decltype(&D3D11CreateDeviceAndSwapChain) ptrD3D11CreateDeviceAndSwapChainUpscaling; @@ -61,7 +67,9 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( upscaling.CheckBackendFeatures(pAdapter); // Use better swap effect to prevent tearing and improve performance - pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + // But don't change it for VR as it can affect frame pacing with the VR compositor + if (!globals::game::isVR) + pSwapChainDesc->SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; bool shouldProxy = !globals::game::isVR; if (shouldProxy) @@ -141,8 +149,14 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChainUpscaling( ppImmediateContext); if (upscaling.IsBackendInitialized()) { - upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); - upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + // Skip Streamline interface wrapping for VR — slUpgradeInterface wraps the D3D + // device and swap chain with Streamline proxy objects, which disrupts VR compositor + // frame pacing (causes judder/stuttering). DLSS still functions without wrapped + // interfaces; only frame generation requires them (and that's already VR-gated above). + if (!globals::game::isVR) { + upscaling.UpgradeBackendInterface((void**)&(*ppDevice)); + upscaling.UpgradeBackendInterface((void**)&(*ppSwapChain)); + } upscaling.SetBackendD3DDevice(*ppDevice); upscaling.PostBackendDevice(); } @@ -238,6 +252,41 @@ void Upscaling::DrawSettings() ImGui::Text("Set to 'Default' for automatic selection based on your Upscale Preset and hardware."); ImGui::Text("Changing this setting requires a restart to take effect."); } + + if (globals::game::isVR) { + if (ImGui::TreeNodeEx("VR Viewport Scaling", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::SliderFloat("DLSS Viewport Scale", &settings.vrDlssViewportScale, 0.5f, 1.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Controls the fraction of each eye's view that DLSS processes."); + ImGui::Text("Lower values = better performance, less visible area upscaled by DLSS."); + ImGui::Text("The VR lens hides the periphery, so 0.7-0.85 is recommended."); + ImGui::Text("The periphery outside the DLSS region is filled with a bilinear upscale."); + } + + if (settings.vrDlssViewportScale < 1.0f) { + bool peripheryTAA = settings.vrPeripheryTAA != 0; + if (ImGui::Checkbox("Periphery TAA", &peripheryTAA)) + settings.vrPeripheryTAA = peripheryTAA ? 1 : 0; + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Applies temporal anti-aliasing to the bilinear-upscaled periphery."); + ImGui::Text("Reduces shimmer and improves peripheral quality."); + ImGui::Text("The DLSS center region passes through unchanged."); + } + + ImGui::SliderFloat("Nasal Crop Offset", &settings.vrDlssCropOffsetX, 0.0f, 0.3f, "%.3f"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Shifts the DLSS crop region toward the nose."); + ImGui::Text("Higher values = more of the nasal view processed by DLSS."); + ImGui::Text("0.0 = centered, 0.1-0.2 recommended."); + } + + // Feathered edge disabled pending fix — hidden from UI + // ImGui::SliderFloat("Crop Edge Feather", &settings.vrDlssFeatherWidth, 0.0f, 0.1f, "%.3f"); + } + + ImGui::TreePop(); + } + } } if (globals::game::isVR) { @@ -460,6 +509,9 @@ void Upscaling::LoadSettings(json& o_json) logger::warn("[Upscaling] Loaded useGatherWideKernel {} out of range, clamping to 1", settings.useGatherWideKernel); settings.useGatherWideKernel = 1; } + settings.vrDlssViewportScale = std::clamp(settings.vrDlssViewportScale, 0.5f, 1.0f); + settings.vrDlssCropOffsetX = std::clamp(settings.vrDlssCropOffsetX, 0.0f, 0.3f); + settings.vrDlssFeatherWidth = std::clamp(settings.vrDlssFeatherWidth, 0.0f, 0.1f); auto iniSettingCollection = globals::game::iniPrefSettingCollection; if (iniSettingCollection) { auto setting = iniSettingCollection->GetSetting("bUseTAA:Display"); @@ -487,6 +539,10 @@ void Upscaling::DataLoaded() void Upscaling::Load() { *(uintptr_t*)&ptrD3D11CreateDeviceAndSwapChainUpscaling = SKSE::PatchIAT(hk_D3D11CreateDeviceAndSwapChainUpscaling, "d3d11.dll", "D3D11CreateDeviceAndSwapChain"); + + // Install depth/stencil registration hook early (before renderer creates targets) + if (globals::game::isVR) + TAAReorder::InitEarly(); } struct BSImageSpace_Init_FXAA @@ -530,6 +586,10 @@ void Upscaling::PostPostLoad() stl::detour_thunk(REL::RelocationID(98974, 105626)); logger::info("[Upscaling] Installed hooks"); + + // Install TAA reordering hooks for VR periphery TAA + if (globals::game::isVR) + TAAReorder::Init(); } Upscaling::UpscaleMethod Upscaling::GetUpscaleMethod() const @@ -639,6 +699,13 @@ void Upscaling::DestroyUpscalingTextureResources(UpscaleMethod a_upscalemethod) } } + // VR periphery TAA textures - only needed for DLSS with viewport scaling + if (a_upscalemethod != UpscaleMethod::kDLSS) { + vrPreTAACopy = nullptr; + for (int i = 0; i < 2; i++) + vrTAAdPerEye[i].reset(); + } + // Motion vector copy texture is only needed for DLSS - destroy when switching away from DLSS if (a_upscalemethod != UpscaleMethod::kDLSS) { if (motionVectorCopyTexture) { @@ -692,6 +759,8 @@ void Upscaling::CheckResources(UpscaleMethod a_upscalemethod) vrIntermediateMotionVectors[i].reset(); vrIntermediateReactiveMask[i].reset(); vrIntermediateTransparencyMask[i].reset(); + vrFinalOutput[i].reset(); + vrCropColorIn[i].reset(); } } } @@ -744,6 +813,8 @@ ID3D11PixelShader* Upscaling::GetDepthRefractionUpscalePS() if (!depthRefractionUpscalePS) { logger::debug("Compiling DepthRefractionUpscalePS.hlsl"); std::vector> defines = { { "PSHADER", "" } }; + if (globals::game::isVR) + defines.push_back({ "VR", "" }); depthRefractionUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DepthRefractionUpscalePS.hlsl", defines, "ps_5_0")); } @@ -771,6 +842,37 @@ ID3D11VertexShader* Upscaling::GetUpscaleVS() return upscaleVS.get(); } +ID3D11PixelShader* Upscaling::GetDlssCompositePS() +{ + if (!vrDlssCompositePS) { + logger::debug("Compiling DLSSCompositePS.hlsl"); + vrDlssCompositePS.attach((ID3D11PixelShader*)Util::CompileShader(L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0")); + } + + return vrDlssCompositePS.get(); +} + +ID3D11PixelShader* Upscaling::GetDlssUpscalePS() +{ + if (!vrDlssUpscalePS) { + logger::debug("Compiling DLSSCompositePS.hlsl (BILINEAR_UPSCALE)"); + vrDlssUpscalePS.attach((ID3D11PixelShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/DLSSCompositePS.hlsl", + { { "PSHADER", "" }, { "BILINEAR_UPSCALE", "" } }, "ps_5_0")); + } + + if (!vrDlssUpscaleCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = sizeof(DlssCompositeCB); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrDlssUpscaleCB.put())); + } + + return vrDlssUpscalePS.get(); +} + eastl::unique_ptr Upscaling::CreateTextureFromSource(ID3D11Resource* src, uint32_t width, uint32_t height, bool copyBindFlags, bool createSRV, bool createUAV, const char* name) { @@ -820,7 +922,7 @@ void Upscaling::CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight std::string suffix = (i == 0) ? "Left" : "Right"; vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, inWidth, inHeight, false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); - vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, false, ("Upscale_ColorOut_" + suffix).c_str()); + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, outWidth, outHeight, false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); // Depth: R32_TYPELESS base (matches kMAIN), with R32_FLOAT SRV for ClearHMDMaskCS. // CopySubresourceRegion requires matching typeless formats; SRV reinterprets as R32_FLOAT. @@ -873,48 +975,228 @@ void Upscaling::PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* de uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; - bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; - if (!needsRecreate) { - needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || - vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || - vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || - vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); - } - if (needsRecreate) { - logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", - eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); - CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, - colorSrc, mvecSrc, reactiveSrc, transparencySrc); - } + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + if (viewportScaling) { + // Viewport scaling: physically crop all DLSS inputs to eliminate non-zero subrect offsets. + // vrIntermediateColorIn stays at FULL render-res (for ClearHMDMask + FillPeriphery). + // All other DLSS inputs (depth, mvec, masks) are CROP-sized. + // vrCropColorIn is CROP-sized (DLSS color input, extracted from masked full color). + // This ensures DLSS sees all inputs at {0,0} with no subrect base offsets, + // which is critical for correct temporal reprojection during camera motion. + uint32_t cropWidthIn = (uint32_t)(eyeWidthIn * vpScale); + uint32_t cropHeightIn = (uint32_t)(eyeHeightIn * vpScale); + uint32_t cropWidthOut = (uint32_t)(eyeWidthOut * vpScale); + uint32_t cropHeightOut = (uint32_t)(eyeHeightOut * vpScale); + + bool needsRecreate = !vrIntermediateColorIn[0] || !vrCropColorIn[0] || !vrIntermediateDepth[0] || + !vrIntermediateColorOut[0] || !vrFinalOutput[0]; + if (!needsRecreate) { + // Check format too — periphery TAA feeds R8G8B8A8 post-PP intermediate, + // while normal DLSS feeds R11G11B10 kMAIN. Must recreate on format change. + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrCropColorIn[0]->desc.Width != cropWidthIn || + vrCropColorIn[0]->desc.Height != cropHeightIn || + vrIntermediateDepth[0]->desc.Width != cropWidthIn || + vrIntermediateDepth[0]->desc.Height != cropHeightIn || + vrIntermediateColorOut[0]->desc.Width != cropWidthOut || + vrIntermediateColorOut[0]->desc.Height != cropHeightOut || + vrFinalOutput[0]->desc.Width != eyeWidthOut || + vrFinalOutput[0]->desc.Height != eyeHeightOut); + } - // Extract both eyes' inputs from combined stereo buffers - for (uint32_t i = 0; i < 2; ++i) { - uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; - D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR viewport-scaled intermediates: full {}x{}, crop in {}x{}, crop out {}x{}", + eyeWidthIn, eyeHeightIn, cropWidthIn, cropHeightIn, cropWidthOut, cropHeightOut); + + for (int i = 0; i < 2; i++) { + std::string suffix = (i == 0) ? "Left" : "Right"; + + // Full-size color for ClearHMDMask + FillPeriphery + vrIntermediateColorIn[i] = CreateTextureFromSource(colorSrc, eyeWidthIn, eyeHeightIn, + false, true, true, ("Upscale_ColorIn_" + suffix).c_str()); + + // Crop-sized DLSS color input (needs UAV for ClearHMDMask) + vrCropColorIn[i] = CreateTextureFromSource(colorSrc, cropWidthIn, cropHeightIn, + false, true, true, ("Upscale_CropColorIn_" + suffix).c_str()); + + // Crop-sized DLSS output + vrIntermediateColorOut[i] = CreateTextureFromSource(colorSrc, cropWidthOut, cropHeightOut, + false, true, true, ("Upscale_ColorOut_" + suffix).c_str()); + + // Crop-sized depth (R32_TYPELESS with R32_FLOAT SRV) + { + D3D11_TEXTURE2D_DESC depthDesc = {}; + depthDesc.Width = cropWidthIn; + depthDesc.Height = cropHeightIn; + depthDesc.MipLevels = 1; + depthDesc.ArraySize = 1; + depthDesc.Format = DXGI_FORMAT_R32_TYPELESS; + depthDesc.SampleDesc.Count = 1; + depthDesc.Usage = D3D11_USAGE_DEFAULT; + depthDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + vrIntermediateDepth[i] = eastl::make_unique(depthDesc); + Util::SetResourceName(vrIntermediateDepth[i]->resource.get(), ("Upscale_Depth_" + suffix).c_str()); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_FLOAT; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + vrIntermediateDepth[i]->CreateSRV(srvDesc); + } - context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); - context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); - } + // Crop-sized motion vectors, reactive mask, transparency mask + vrIntermediateMotionVectors[i] = CreateTextureFromSource(mvecSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_MVec_" + suffix).c_str()); + vrIntermediateReactiveMask[i] = CreateTextureFromSource(reactiveSrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Reactive_" + suffix).c_str()); + vrIntermediateTransparencyMask[i] = CreateTextureFromSource(transparencySrc, cropWidthIn, cropHeightIn, + false, true, false, ("Upscale_Transparency_" + suffix).c_str()); + + // Full display-res composition target (needs RTV for PS feathered composite) + vrFinalOutput[i] = CreateTextureFromSource(colorSrc, eyeWidthOut, eyeHeightOut, + false, true, true, ("Upscale_FinalOutput_" + suffix).c_str()); + // Add render target bind flag and create RTV for pixel shader composite + { + D3D11_TEXTURE2D_DESC finalDesc; + vrFinalOutput[i]->resource->GetDesc(&finalDesc); + if (!(finalDesc.BindFlags & D3D11_BIND_RENDER_TARGET)) { + // Recreate with render target support + finalDesc.BindFlags |= D3D11_BIND_RENDER_TARGET; + vrFinalOutput[i] = eastl::make_unique(finalDesc); + Util::SetResourceName(vrFinalOutput[i]->resource.get(), ("Upscale_FinalOutput_" + suffix).c_str()); + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = finalDesc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = 1; + vrFinalOutput[i]->CreateSRV(srvDesc); + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = finalDesc.Format; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateUAV(uavDesc); + } + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = finalDesc.Format; + rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + vrFinalOutput[i]->CreateRTV(rtvDesc); + } + } + } - // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer. - // Depth is read from the combined stereo SRV at the per-eye offset; color is written - // to the isolated per-eye UAV (ColorOffsetX = 0). - auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + // Copy full eye to full-size vrIntermediateColorIn (raw render-res, no HMD mask yet) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + } + + // Nasal offset: shift crop toward nose + // Eye 0 (left): nose is right -> shift right (+) + // Eye 1 (right): nose is left -> shift left (-) + float nasalShiftFrac = settings.vrDlssCropOffsetX; + uint32_t baseCropOffsetX = (eyeWidthIn - cropWidthIn) / 2; + uint32_t cropOffsetY = (eyeHeightIn - cropHeightIn) / 2; + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + + // Extract DLSS crop from raw buffer (before TAA or HMD mask), then mask the crop directly. + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + + // Per-eye nasal offset in render resolution space + int32_t nasalShift = (int32_t)(nasalShiftFrac * eyeWidthIn); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t cropOffsetX = (uint32_t)std::clamp((int32_t)baseCropOffsetX + eyeNasalShift, 0, (int32_t)(eyeWidthIn - cropWidthIn)); + + // Crop color from raw (unmasked, non-TAA'd) full-size buffer + D3D11_BOX cropBox = { cropOffsetX, cropOffsetY, 0, cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrCropColorIn[i]->resource.get(), 0, 0, 0, 0, + vrIntermediateColorIn[i]->resource.get(), 0, &cropBox); + + // ClearHMDMask directly on the crop (depth offset accounts for eye + crop position in stereo buffer) + ClearHMDMask(vrCropColorIn[i]->uav.get(), depthTexture.depthSRV, + cropWidthIn, cropHeightIn, offsetXIn + cropOffsetX, 0, cropOffsetY); + + // Crop depth/mvec/reactive/transparency directly from stereo buffers + D3D11_BOX stereoCropBox = { offsetXIn + cropOffsetX, cropOffsetY, 0, + offsetXIn + cropOffsetX + cropWidthIn, cropOffsetY + cropHeightIn, 1 }; + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, + depthSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, + mvecSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, + reactiveSrc, 0, &stereoCropBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, + transparencySrc, 0, &stereoCropBox); + } - for (uint32_t i = 0; i < 2; ++i) { - uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; - ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, - eyeWidthIn, eyeHeightIn, depthOffset, 0); + // ClearHMDMask on full-size buffer (for FillPeriphery) + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } + } else { + // Non-viewport-scaling path: all textures at full per-eye dimensions + bool needsRecreate = !vrIntermediateColorIn[0] || !vrIntermediateColorOut[0]; + if (!needsRecreate) { + D3D11_TEXTURE2D_DESC srcDesc; + ((ID3D11Texture2D*)colorSrc)->GetDesc(&srcDesc); + needsRecreate = (vrIntermediateColorIn[0]->desc.Width != eyeWidthIn || + vrIntermediateColorIn[0]->desc.Height != eyeHeightIn || + vrIntermediateColorIn[0]->desc.Format != srcDesc.Format || + vrIntermediateColorOut[0]->desc.Width != eyeWidthOut || + vrIntermediateColorOut[0]->desc.Height != eyeHeightOut); + } + if (needsRecreate) { + logger::info("[Upscaling] (Re)creating VR intermediates: per-eye in {}x{}, out {}x{}", + eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + CreateVRIntermediateTextures(eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut, + colorSrc, mvecSrc, reactiveSrc, transparencySrc); + } + + // Release viewport-scaling-specific textures + for (int i = 0; i < 2; i++) { + vrCropColorIn[i].reset(); + vrFinalOutput[i].reset(); + vrTAAdPerEye[i].reset(); + } + vrPreTAACopy = nullptr; + + // Copy full eye to per-eye intermediates + for (uint32_t i = 0; i < 2; ++i) { + uint32_t offsetXIn = (i == 1) ? eyeWidthIn : 0; + D3D11_BOX srcBox = { offsetXIn, 0, 0, offsetXIn + eyeWidthIn, eyeHeightIn, 1 }; + + context->CopySubresourceRegion(vrIntermediateColorIn[i]->resource.get(), 0, 0, 0, 0, colorSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateDepth[i]->resource.get(), 0, 0, 0, 0, depthSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateMotionVectors[i]->resource.get(), 0, 0, 0, 0, mvecSrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateTransparencyMask[i]->resource.get(), 0, 0, 0, 0, transparencySrc, 0, &srcBox); + context->CopySubresourceRegion(vrIntermediateReactiveMask[i]->resource.get(), 0, 0, 0, 0, reactiveSrc, 0, &srcBox); + } + + // Zero color where depth == 0 (HMD hidden area) in each per-eye buffer + auto& depthTexture = globals::game::renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + for (uint32_t i = 0; i < 2; ++i) { + uint32_t depthOffset = (i == 1) ? eyeWidthIn : 0; + ClearHMDMask(vrIntermediateColorIn[i]->uav.get(), depthTexture.depthSRV, + eyeWidthIn, eyeHeightIn, depthOffset, 0); + } } if (state->frameAnnotations) state->EndPerfEvent(); } -void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) +void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only) { if (!globals::game::isVR) return; @@ -929,11 +1211,258 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); uint32_t eyeHeightOut = (uint32_t)screenSize.y; - // Write upscaled outputs back - for (uint32_t i = 0; i < 2; ++i) { + float vpScale = settings.vrDlssViewportScale; + auto upscaleMethod = GetUpscaleMethod(); + bool viewportScaling = (vpScale < 1.0f) && (upscaleMethod == UpscaleMethod::kDLSS); + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { uint32_t offsetXOut = (i == 1) ? eyeWidthOut : 0; - D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; - context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + + if (viewportScaling && vrFinalOutput[i]) { + // Paste crop-sized DLSS output into center of full-size composition target + uint32_t dlssWidthOut = vrIntermediateColorOut[i]->desc.Width; + uint32_t dlssHeightOut = vrIntermediateColorOut[i]->desc.Height; + + // Calculate paste position with nasal offset (in display resolution space) + int32_t nasalShift = (int32_t)(settings.vrDlssCropOffsetX * eyeWidthOut); + int32_t eyeNasalShift = (i == 0) ? nasalShift : -nasalShift; + uint32_t pasteX = (uint32_t)std::clamp((int32_t)((eyeWidthOut - dlssWidthOut) / 2) + eyeNasalShift, 0, (int32_t)(eyeWidthOut - dlssWidthOut)); + uint32_t pasteY = (eyeHeightOut - dlssHeightOut) / 2; + + float featherPixels = settings.vrDlssFeatherWidth * eyeWidthOut; + + static uint32_t featherLogCount = 0; + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: featherPixels={:.1f} (width={:.4f} * eyeW={}), dlss={}x{}, paste=({},{})", + i, featherPixels, settings.vrDlssFeatherWidth, eyeWidthOut, dlssWidthOut, dlssHeightOut, pasteX, pasteY); + } + + if (featherPixels > 0.0f && vrFinalOutput[i]->rtv) { + // Pixel shader feathered composite (preserves periphery TAA via hardware alpha blend) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + + // Lazy-compile PS and create blend state + if (!vrFeatheredCompositePS) { + vrFeatheredCompositePS.attach(reinterpret_cast(Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositePS.hlsl", { { "PSHADER", "" } }, "ps_5_0"))); + if (vrFeatheredCompositePS) + logger::info("[Upscaling] FeatheredCompositePS compiled successfully"); + else + logger::error("[Upscaling] FeatheredCompositePS FAILED to compile!"); + + // Create SrcAlpha/InvSrcAlpha blend state + D3D11_BLEND_DESC blendDesc = {}; + blendDesc.RenderTarget[0].BlendEnable = TRUE; + blendDesc.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA; + blendDesc.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; + blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; + blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; + blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; + blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + DX::ThrowIfFailed(globals::d3d::device->CreateBlendState(&blendDesc, vrFeatheredCompositeBlendState.put())); + + // Create constant buffer (48 bytes: float2 origin, float2 size, float feather, pad, float2 srcUVOrigin, float2 srcUVScale) + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + + if (vrFeatheredCompositePS) { + // Save current OM state + ID3D11BlendState* oldBlendState = nullptr; + float oldBlendFactor[4]; + UINT oldSampleMask; + context->OMGetBlendState(&oldBlendState, oldBlendFactor, &oldSampleMask); + + ID3D11RenderTargetView* oldRTV = nullptr; + ID3D11DepthStencilView* oldDSV = nullptr; + context->OMGetRenderTargets(1, &oldRTV, &oldDSV); + + // Update constant buffer + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + float originX, originY; + float sizeX, sizeY; + float featherWidth; + float pad0; + float srcUVOriginX, srcUVOriginY; + float srcUVScaleX, srcUVScaleY; + float pad1[2]; + } cbData = { + (float)pasteX, (float)pasteY, + (float)dlssWidthOut, (float)dlssHeightOut, + featherPixels, 0.0f, + 0.0f, 0.0f, // SrcUVOrigin: (0,0) for per-eye textures (identity) + 1.0f, 1.0f, // SrcUVScale: (1,1) for per-eye textures (identity) + {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + // Set viewport to match the per-eye final output + D3D11_VIEWPORT vp = {}; + vp.Width = (float)eyeWidthOut; + vp.Height = (float)eyeHeightOut; + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + + // Set blend state for feathered alpha compositing + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(vrFeatheredCompositeBlendState.get(), blendFactor, 0xFFFFFFFF); + + // Set render target to vrFinalOutput (contains TAA'd periphery) + ID3D11RenderTargetView* rtvs[1] = { vrFinalOutput[i]->rtv.get() }; + context->OMSetRenderTargets(1, rtvs, nullptr); + + // Set shaders + context->VSSetShader(GetUpscaleVS(), nullptr, 0); + context->PSSetShader(vrFeatheredCompositePS.get(), nullptr, 0); + + // Set input assembler for bufferless fullscreen triangle + context->IASetInputLayout(nullptr); + context->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); + context->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + // Bind rasterizer state (no culling) + context->RSSetState(upscaleRasterizerState.get()); + + // Bind crop texture SRV and sampler + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->PSSetShaderResources(0, 1, srvs); + + if (!vrLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&sampDesc, vrLinearSampler.put())); + } + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->PSSetSamplers(0, 1, samplers); + + // Bind constant buffer + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->PSSetConstantBuffers(0, 1, cbs); + + // Draw fullscreen triangle + context->Draw(3, 0); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositePS drawn: eye={}, crop=({},{}) {}x{}, feather={:.1f}", + i, pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels); + featherLogCount++; + } + + // Cleanup PS state + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->PSSetShaderResources(0, 1, nullSRV); + context->PSSetShader(nullptr, nullptr, 0); + context->VSSetShader(nullptr, nullptr, 0); + + // Restore OM state + context->OMSetBlendState(oldBlendState, oldBlendFactor, oldSampleMask); + if (oldBlendState) + oldBlendState->Release(); + context->OMSetRenderTargets(1, &oldRTV, oldDSV); + if (oldRTV) + oldRTV->Release(); + if (oldDSV) + oldDSV->Release(); + } else { + // PS compilation failed; fall back to hard paste + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else if (featherPixels > 0.0f) { + // Fallback: RTV not available, use legacy CS path + if (!vrFeatheredCompositeCS) { + vrFeatheredCompositeCS.attach((ID3D11ComputeShader*)Util::CompileShader( + L"Data/Shaders/Upscaling/FeatheredCompositeCS.hlsl", {}, "cs_5_0")); + if (vrFeatheredCompositeCS) + logger::info("[Upscaling] FeatheredCompositeCS compiled (fallback)"); + else + logger::error("[Upscaling] FeatheredCompositeCS FAILED to compile!"); + if (!vrFeatheredCompositeCB) { + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 48; + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrFeatheredCompositeCB.put())); + } + } + + if (vrFeatheredCompositeCS) { + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrFeatheredCompositeCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + struct + { + uint32_t cx, cy, cw, ch; + float feather; + float pad[3]; + } cbData = { + pasteX, pasteY, dlssWidthOut, dlssHeightOut, featherPixels, {} + }; + memcpy(mapped.pData, &cbData, sizeof(cbData)); + context->Unmap(vrFeatheredCompositeCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrFeatheredCompositeCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + ID3D11ShaderResourceView* srvs[1] = { vrIntermediateColorOut[i]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[i]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(vrFeatheredCompositeCS.get(), nullptr, 0); + context->Dispatch((dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8, 1); + + if (featherLogCount < 3) { + logger::info("[Upscaling] FeatheredCompositeCS dispatched (fallback): groups=({},{})", + (dlssWidthOut + 7) / 8, (dlssHeightOut + 7) / 8); + featherLogCount++; + } + + ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; + context->CSSetShaderResources(0, 1, nullSRV); + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + } else { + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + } + } else { + // Hard paste (no feathering) + D3D11_BOX dlssBox = { 0, 0, 0, dlssWidthOut, dlssHeightOut, 1 }; + context->CopySubresourceRegion(vrFinalOutput[i]->resource.get(), 0, pasteX, pasteY, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &dlssBox); + if (featherLogCount < 3) { + logger::info("[Upscaling] FinalizePerEyeOutputs eye{}: using HARD PASTE (no feathering)", i); + featherLogCount++; + } + } + + // Copy composition target to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrFinalOutput[i]->resource.get(), 0, &outBox); + } else { + // Direct copy DLSS output to stereo buffer + D3D11_BOX outBox = { 0, 0, 0, eyeWidthOut, eyeHeightOut, 1 }; + context->CopySubresourceRegion(colorDst, 0, offsetXOut, 0, 0, + vrIntermediateColorOut[i]->resource.get(), 0, &outBox); + } } if (state->frameAnnotations) @@ -941,7 +1470,11 @@ void Upscaling::FinalizePerEyeOutputs(ID3D11Resource* colorDst) } void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX) + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY, + uint32_t depthWidth, uint32_t depthHeight, + uint32_t colorWidth, uint32_t colorHeight, + ID3D11ShaderResourceView* fallbackSRV, uint32_t fallbackOffsetX) { if (!globals::game::isVR) return; @@ -952,7 +1485,7 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe vrClearHMDMaskCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/ClearHMDMaskCS.hlsl", {}, "cs_5_0")); D3D11_BUFFER_DESC cbDesc = {}; - cbDesc.ByteWidth = 16; // 4 uints + cbDesc.ByteWidth = 32; // 8 uints (offsets + optional scaling dimensions) cbDesc.Usage = D3D11_USAGE_DYNAMIC; cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -965,8 +1498,9 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->CSSetShader(vrClearHMDMaskCS.get(), nullptr, 0); - ID3D11ShaderResourceView* srvs[1] = { depthSRV }; - context->CSSetShaderResources(0, 1, srvs); + // t0 = depth, t1 = fallback (nullptr → unbound → reads return (0,0,0,0) → black) + ID3D11ShaderResourceView* srvs[2] = { depthSRV, fallbackSRV }; + context->CSSetShaderResources(0, 2, srvs); ID3D11UnorderedAccessView* uavs[1] = { colorUAV }; context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); @@ -974,9 +1508,10 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe D3D11_MAPPED_SUBRESOURCE mapped{}; context->Map(vrClearHMDMaskCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); - uint32_t offsets[4] = { depthOffsetX, colorOffsetX, 0, 0 }; + uint32_t cbData[8] = { depthOffsetX, colorOffsetX, depthOffsetY, fallbackOffsetX, + depthWidth, depthHeight, colorWidth, colorHeight }; - memcpy(mapped.pData, offsets, sizeof(offsets)); + memcpy(mapped.pData, cbData, sizeof(cbData)); context->Unmap(vrClearHMDMaskCB.get(), 0); ID3D11Buffer* cbs[1] = { vrClearHMDMaskCB.get() }; @@ -984,13 +1519,81 @@ void Upscaling::ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderRe context->Dispatch(dispatchX, dispatchY, 1); + // Unbind + ID3D11ShaderResourceView* nullSRVs[2] = { nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; + ID3D11Buffer* nullCB[1] = { nullptr }; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); + context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } +} + +void Upscaling::FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV) +{ + if (!globals::game::isVR || !vrFinalOutput[eyeIndex]) + return; + if (!overrideSRV && !vrIntermediateColorIn[eyeIndex]) + return; + + auto context = globals::d3d::context; + + if (!vrPeripheryFillCS) { + vrPeripheryFillCS.attach((ID3D11ComputeShader*)Util::CompileShader(L"Data/Shaders/Upscaling/VRPeripheryFillCS.hlsl", {}, "cs_5_0")); + + D3D11_BUFFER_DESC cbDesc = {}; + cbDesc.ByteWidth = 16; // 4 uints + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(globals::d3d::device->CreateBuffer(&cbDesc, nullptr, vrPeripheryFillCB.put())); + + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + DX::ThrowIfFailed(globals::d3d::device->CreateSamplerState(&samplerDesc, vrLinearSampler.put())); + } + + if (vrPeripheryFillCS) { + auto dispatchX = (dstWidth + 7) / 8; + auto dispatchY = (dstHeight + 7) / 8; + + context->CSSetShader(vrPeripheryFillCS.get(), nullptr, 0); + + // Read from overrideSRV (e.g. TAA texture) or default render-res per-eye input. + ID3D11ShaderResourceView* srvs[1] = { overrideSRV ? overrideSRV : vrIntermediateColorIn[eyeIndex]->srv.get() }; + context->CSSetShaderResources(0, 1, srvs); + + ID3D11UnorderedAccessView* uavs[1] = { vrFinalOutput[eyeIndex]->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + ID3D11SamplerState* samplers[1] = { vrLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + + D3D11_MAPPED_SUBRESOURCE mapped{}; + context->Map(vrPeripheryFillCB.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + uint32_t cbData[4] = { srcWidth, srcHeight, dstWidth, dstHeight }; + memcpy(mapped.pData, cbData, sizeof(cbData)); + context->Unmap(vrPeripheryFillCB.get(), 0); + + ID3D11Buffer* cbs[1] = { vrPeripheryFillCB.get() }; + context->CSSetConstantBuffers(0, 1, cbs); + + context->Dispatch(dispatchX, dispatchY, 1); + // Unbind ID3D11ShaderResourceView* nullSRV[1] = { nullptr }; ID3D11UnorderedAccessView* nullUAV[1] = { nullptr }; ID3D11Buffer* nullCB[1] = { nullptr }; + ID3D11SamplerState* nullSampler[1] = { nullptr }; context->CSSetShaderResources(0, 1, nullSRV); context->CSSetUnorderedAccessViews(0, 1, nullUAV, nullptr); context->CSSetConstantBuffers(0, 1, nullCB); + context->CSSetSamplers(0, 1, nullSampler); context->CSSetShader(nullptr, nullptr, 0); } } @@ -1032,6 +1635,10 @@ void Upscaling::ConfigureTAA() auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + // Disable water TAA when upscaling is enabled + bool* enableWaterTAA = reinterpret_cast(reinterpret_cast(BSImagespaceShaderISTemporalAA) + 0x38LL); + *enableWaterTAA = !(upscaleMethod == UpscaleMethod::kNONE || upscaleMethod == UpscaleMethod::kTAA); + // Force enable TAA if needed BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod != UpscaleMethod::kNONE; } @@ -1097,6 +1704,10 @@ void Upscaling::ConfigureUpscaling(RE::BSGraphics::State* a_viewport) // Disable dynamic resolution unless the game explicitly enables it if (!globals::game::isVR) runtimeData.dynamicResolutionLock = 1; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // No longer need to force-disable culling when upscaling is active. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs. } void Upscaling::SetupResources() @@ -1141,6 +1752,7 @@ void Upscaling::SetupResources() depthStencilDesc.BackFace.StencilDepthFailOp = depthStencilDesc.FrontFace.StencilDepthFailOp; depthStencilDesc.BackFace.StencilPassOp = depthStencilDesc.FrontFace.StencilPassOp; depthStencilDesc.BackFace.StencilFunc = depthStencilDesc.FrontFace.StencilFunc; + } else { depthStencilDesc.StencilEnable = false; // Disable stencil testing } @@ -1194,6 +1806,16 @@ void Upscaling::ClearShaderCache() depthRefractionUpscalePS = nullptr; // com_ptr automatically releases underwaterMaskUpscalePS = nullptr; // com_ptr automatically releases upscaleVS = nullptr; // com_ptr automatically releases + vrClearHMDMaskCS = nullptr; + vrPeripheryFillCS = nullptr; + vrPeripheryFillCB = nullptr; + vrFeatheredCompositeCS = nullptr; + vrFeatheredCompositeCB = nullptr; + vrFeatheredCompositePS = nullptr; + vrFeatheredCompositeBlendState = nullptr; + vrDlssCompositePS = nullptr; + vrDlssUpscalePS = nullptr; + vrDlssUpscaleCB = nullptr; } void Upscaling::CopySharedD3D12Resources() @@ -1404,6 +2026,17 @@ bool Upscaling::IsUpscalingActive() const return resolutionScale.x < .99f; } +std::vector Upscaling::GetActiveConstraints() const +{ + std::vector constraints; + + // VR depth buffer culling is now compatible with upscaling thanks to depth buffer upscaling. + // The depth buffer is upscaled in UpscaleDepth() before OBBOcclusionTesting runs, + // so we no longer need to constrain depth buffer culling when upscaling is active. + + return constraints; +} + /** * @brief Retrieves the current frame time for frame generation. * @@ -1515,7 +2148,7 @@ Upscaling::BlurResources Upscaling::GetBlurResources() const return {}; } -void Upscaling::Upscale() +void Upscaling::Upscale(ID3D11Texture2D* colorSourceOverride) { auto upscaleMethod = GetUpscaleMethod(); @@ -1576,8 +2209,11 @@ void Upscaling::Upscale() { state->BeginPerfEvent("Upscaling"); + // Use color source override if provided (e.g., post-PP intermediate for periphery TAA) + ID3D11Resource* colorSrc = colorSourceOverride ? static_cast(colorSourceOverride) : static_cast(main.texture); + if (upscaleMethod == UpscaleMethod::kDLSS) { - streamline.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); + streamline.Upscale(colorSrc, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVectorCopyTexture->resource.get()); } else if (upscaleMethod == UpscaleMethod::kFSR) { fidelityFX.Upscale(main.texture, reactiveMaskTexture->resource.get(), transparencyCompositionMaskTexture->resource.get(), motionVector.texture, settings.sharpnessFSR); } @@ -1715,9 +2351,10 @@ void Upscaling::UpscaleDepth() // Skip alias copies to reduce unnecessary copy churn. copyIfNonAliased(depthCopy.texture, depth.texture); - // Clear stencil to be 0xFF + // Clear stencil to 0x00 for VR — the VR shader path discards pixels with + // stencil > 0x00, so 0x00 means "all pixels valid" (process entire display-res). if (globals::game::isVR) { - context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0xFF); + context->ClearDepthStencilView(depthCopy.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0x00); } // Set depth stencil state to write 0x00 @@ -1817,20 +2454,88 @@ void Upscaling::Main_PostProcessing::thunk(RE::ImageSpaceManager* a_this, uint32 if (upscaling.d3d12SwapChainActive && upscaling.settings.frameGenerationMode) upscaling.CopySharedD3D12Resources(); - if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) - upscaling.PerformUpscaling(); + // Increment diagnostic counter (rate-limits TAAReorder logging) + if (TAAReorder::g_initialized) { + TAAReorder::g_diagCounter = (TAAReorder::g_diagCounter + 1) % TAAReorder::DIAG_INTERVAL; + if (TAAReorder::g_diagCounter == 0) { + TAAReorder::g_frameSeqCounter = 0; + logger::info("[SEQ] Main_PostProcessing START seq={}", TAAReorder::g_frameSeqCounter++); + } + } - if (upscaleMethod == UpscaleMethod::kDLSS) - upscaling.ApplySharpening(); + bool peripheryTAA = TAAReorder::ShouldReorderTAA(); + + // Reset per-frame flags unconditionally + TAAReorder::g_postPPReady = false; + TAAReorder::g_dlssReady = false; + TAAReorder::g_dlssPasteComplete = false; + TAAReorder::g_phase5Complete = false; + TAAReorder::g_bsHookCallCount = 0; + TAAReorder::g_submitTexForPaste = nullptr; + + if (peripheryTAA) { + // ─── Periphery TAA with post-conductor DLSS (PureDark's approach) ─── + // func() with TAA enabled → conductor runs all passes unimpeded: + // Phase 2A: ExecutePassHook captures post-PP intermediate to g_postPPCopy + // Phase 5: TAA + DRS → submit texture + // After conductor: ConductorCallHook evaluates DLSS on g_postPPCopy, + // then pastes DLSS center onto submit texture + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + + if (TAAReorder::g_diagCounter == 0) + logger::info("[TAAReorder] peripheryTAA: running func() with TAA enabled..."); + + // Clear stencil marks left by VRStereoOptimizations to prevent TAA interference + if (globals::features::vr.stereoOpt.loaded) { + auto renderer = globals::game::renderer; + auto& depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depth.views[0]) + globals::d3d::context->ClearDepthStencilView(depth.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0); + } - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); + // Set TAA high-frequency response for periphery quality + auto fTAAHighFreq = RE::GetINISetting("fTAAHighFreq:Display"); + float savedHF = fTAAHighFreq ? fTAAHighFreq->data.f : 0.0f; + if (fTAAHighFreq) + fTAAHighFreq->data.f = 1.0f; + + // func() with TAA ENABLED — DLSS eval + paste in ConductorCallHook (post-conductor) + BSImagespaceShaderISTemporalAA->taaEnabled = true; + func(a_this, a3, a_target, a_4, a_5); - BSImagespaceShaderISTemporalAA->taaEnabled = upscaleMethod == UpscaleMethod::kTAA; + // Restore original TAA HF value + if (fTAAHighFreq) + fTAAHighFreq->data.f = savedHF; - func(a_this, a3, a_target, a_4, a_5); + // Lock DRS + update camera (after conductor completes) + auto& runtimeData = globals::game::graphicsState->GetRuntimeData(); + runtimeData.dynamicResolutionLock = 1; + UpdateCameraData(); + + // Disable TAA for remainder of frame + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } else { + // ─── Normal flow (no periphery TAA) ─── + if (upscaleMethod != UpscaleMethod::kNONE && upscaleMethod != UpscaleMethod::kTAA) + upscaling.PerformUpscaling(); + + if (upscaleMethod == UpscaleMethod::kDLSS) + upscaling.ApplySharpening(); + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + GET_INSTANCE_MEMBER(BSImagespaceShaderISTemporalAA, imageSpaceManager); - BSImagespaceShaderISTemporalAA->taaEnabled = false; + BSImagespaceShaderISTemporalAA->taaEnabled = (upscaleMethod == UpscaleMethod::kTAA); + + if (TAAReorder::g_diagCounter == 0 && TAAReorder::g_initialized) + logger::info("[DIAG] Normal DLSS flow: taaEnabled={}, running func()...", BSImagespaceShaderISTemporalAA->taaEnabled); + + func(a_this, a3, a_target, a_4, a_5); + + BSImagespaceShaderISTemporalAA->taaEnabled = false; + } } void Upscaling::SetScissorRect::thunk(RE::BSGraphics::Renderer* This, int a_left, int a_top, int a_right, int a_bottom) diff --git a/src/Features/Upscaling.h b/src/Features/Upscaling.h index 1e88e99937..df49af71fe 100644 --- a/src/Features/Upscaling.h +++ b/src/Features/Upscaling.h @@ -57,8 +57,12 @@ struct Upscaling : Feature uint streamlineLogLevel = 0; // 0=Off, 1=Default, 2=Verbose float sharpnessFSR = 0.0f; float sharpnessDLSS = 0.0f; - uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M - uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + uint presetDLSS = 0; // 0=Default, 1=J, 2=K, 3=L, 4=M + uint useGatherWideKernel = 1; // 0=Legacy 3x3, 1=Gather wide-kernel + float vrDlssViewportScale = 1.0f; // 0.5 to 1.0, fraction of each eye that DLSS processes (VR only) + uint vrPeripheryTAA = 0; // 0=off, 1=on - enable native TAA on periphery when viewport scaling active (VR only) + float vrDlssCropOffsetX = 0.0f; // 0.0-0.3, nasal offset fraction for DLSS crop position + float vrDlssFeatherWidth = 0.0f; // 0.0-0.1, feather width fraction at DLSS crop boundary (disabled pending fix) }; Settings settings; @@ -110,6 +114,7 @@ struct Upscaling : Feature virtual void Load() override; virtual void PostPostLoad() override; virtual void SetupResources() override; + virtual std::vector GetActiveConstraints() const override; UpscaleMethod GetUpscaleMethod() const; @@ -138,7 +143,11 @@ struct Upscaling : Feature winrt::com_ptr vrClearHMDMaskCB; // Helper to dispatch mask clearing for a single eye region void ClearHMDMask(ID3D11UnorderedAccessView* colorUAV, ID3D11ShaderResourceView* depthSRV, - uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX); + uint32_t eyeWidth, uint32_t eyeHeight, uint32_t depthOffsetX, uint32_t colorOffsetX, + uint32_t depthOffsetY = 0, + uint32_t depthWidth = 0, uint32_t depthHeight = 0, + uint32_t colorWidth = 0, uint32_t colorHeight = 0, + ID3D11ShaderResourceView* fallbackSRV = nullptr, uint32_t fallbackOffsetX = 0); // Shared VR Per-Eye Intermediate Buffers // Owned here so both Streamline (DLSS) and FidelityFX (FSR) can use them. @@ -148,6 +157,43 @@ struct Upscaling : Feature eastl::unique_ptr vrIntermediateMotionVectors[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateReactiveMask[2]; // per-eye render resolution eastl::unique_ptr vrIntermediateTransparencyMask[2]; // per-eye render resolution + eastl::unique_ptr vrFinalOutput[2]; // per-eye display-res composition target (VR viewport scaling) + eastl::unique_ptr vrCropColorIn[2]; // crop-sized DLSS color input (VR viewport scaling only) + + // Periphery TAA (conductor approach) — used by two-call func() flow + winrt::com_ptr vrPreTAACopy; // full stereo kMAIN copy (Phase 1 PP, pre-TAA) + eastl::unique_ptr vrTAAdPerEye[2]; // per-eye render-res TAA'd content (periphery source) + + // Periphery fill compute shader (bilinear upscale render-res → display-res for VR viewport scaling) + winrt::com_ptr vrPeripheryFillCS; + winrt::com_ptr vrPeripheryFillCB; + winrt::com_ptr vrLinearSampler; + + // Feathered composite compute shader (legacy, kept as fallback) + winrt::com_ptr vrFeatheredCompositeCS; + winrt::com_ptr vrFeatheredCompositeCB; + + // Feathered composite pixel shader approach (replaces CS to preserve periphery TAA) + // Based on PureDark's technique from Skyrim-Upscaler VR (MIT license) + winrt::com_ptr vrFeatheredCompositePS; + winrt::com_ptr vrFeatheredCompositeBlendState; + + // DLSS composite pixel shaders (format-converting fullscreen copy for TAAReorder) + winrt::com_ptr vrDlssCompositePS; // point-sample (same-res format conversion) + winrt::com_ptr vrDlssUpscalePS; // bilinear upscale (render-res → display-res) + winrt::com_ptr vrDlssUpscaleCB; // constant buffer for upscale params + ID3D11PixelShader* GetDlssCompositePS(); + ID3D11PixelShader* GetDlssUpscalePS(); + + struct DlssCompositeCB + { + float2 DynResScale; // renderRes / displayRes per-eye + float2 EyeOffset; // (i * eyeWidth, 0) + float2 SrcTexSize; // full texture dimensions + float2 pad; + }; + void FillPeriphery(uint32_t eyeIndex, uint32_t srcWidth, uint32_t srcHeight, + uint32_t dstWidth, uint32_t dstHeight, ID3D11ShaderResourceView* overrideSRV = nullptr); // Helper to create/resize per-eye buffers matching source formats void CreateVRIntermediateTextures(uint32_t inWidth, uint32_t inHeight, uint32_t outWidth, uint32_t outHeight, @@ -160,11 +206,11 @@ struct Upscaling : Feature // Shared Pipeline Steps void PreparePerEyeInputs(ID3D11Resource* colorSrc, ID3D11Resource* depthSrc, ID3D11Resource* mvecSrc, ID3D11Resource* reactiveSrc, ID3D11Resource* transparencySrc); - void FinalizePerEyeOutputs(ID3D11Resource* colorDst); + void FinalizePerEyeOutputs(ID3D11Resource* colorDst, bool eye0Only = false); void ConfigureTAA(); void ConfigureUpscaling(RE::BSGraphics::State* a_state); - void Upscale(); + void Upscale(ID3D11Texture2D* colorSourceOverride = nullptr); // D3D11 textures Texture2D* reactiveMaskTexture = nullptr; diff --git a/src/Features/Upscaling/Streamline.cpp b/src/Features/Upscaling/Streamline.cpp index 71eb3a3542..1b331002fb 100644 --- a/src/Features/Upscaling/Streamline.cpp +++ b/src/Features/Upscaling/Streamline.cpp @@ -7,6 +7,7 @@ #include "../../Hooks.h" #include "../../State.h" #include "../../Util.h" +#include "../TAAReorder.h" #include "../Upscaling.h" #include "DX12SwapChain.h" @@ -237,6 +238,20 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.cameraMotionIncluded = sl::Boolean::eTrue; slConstants.cameraPinholeOffset = { 0.f, 0.f }; + + // VR nasal offset: when the crop is shifted, tell DLSS the optical center is offset + if (globals::game::isVR) { + float nasalFrac = std::clamp(globals::features::upscaling.settings.vrDlssCropOffsetX, 0.0f, 0.3f); + float vpScale = std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + if (nasalFrac > 0.0f && vpScale < 1.0f) { + // Pinhole offset in NDC: how far the crop center is from the eye's optical axis + // Eye 0: shifted right (+X), Eye 1: shifted left (-X) + float shiftNDC = nasalFrac / vpScale; // normalized to crop width + float sign = (eyeIndex == 0) ? 1.0f : -1.0f; + slConstants.cameraPinholeOffset = { sign * shiftNDC, 0.f }; + } + } + slConstants.cameraRight = { viewMatrix._11, viewMatrix._12, viewMatrix._13 }; slConstants.cameraUp = { viewMatrix._21, viewMatrix._22, viewMatrix._23 }; slConstants.cameraFwd = { viewMatrix._31, viewMatrix._32, viewMatrix._33 }; @@ -245,6 +260,28 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.depthInverted = sl::Boolean::eFalse; if (globals::game::isVR) { + // When VR viewport scaling is active, DLSS processes a centered sub-region of each eye. + // The projection matrix must be adjusted to describe only the crop's FOV, not the full eye. + // Without this, DLSS's temporal reprojection maps pixels to wrong world positions, + // destroying temporal accumulation (causing aliasing and instability). + // Scaling rows 0 and 1 of the projection by 1/vpScale narrows the clip-space X/Y + // to match the crop region. clipToPrevClip must also be conjugated (see below). + float vpScale = std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Row 0 → clip.x, Row 1 → clip.y (Streamline row-major, P * pos convention) + slConstants.cameraViewToClip[0].x *= invScale; + slConstants.cameraViewToClip[0].y *= invScale; + slConstants.cameraViewToClip[0].z *= invScale; + slConstants.cameraViewToClip[0].w *= invScale; + slConstants.cameraViewToClip[1].x *= invScale; + slConstants.cameraViewToClip[1].y *= invScale; + slConstants.cameraViewToClip[1].z *= invScale; + slConstants.cameraViewToClip[1].w *= invScale; + // Narrow the reported FOV to match the crop + slConstants.cameraFOV = 2.0f * atanf(vpScale * tanf(slConstants.cameraFOV * 0.5f)); + } + // VR: compute clipToCameraView / clipToPrevClip / prevClipToClip from Skyrim's per-eye matrices. // recalculateCameraMatrices() uses a single static prev-frame slot -- unusable for two viewports. sl::matrixFullInvert(slConstants.clipToCameraView, slConstants.cameraViewToClip); @@ -258,7 +295,62 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye sl::float4x4 invCurrViewProj; sl::matrixFullInvert(invCurrViewProj, currViewProjSL); sl::matrixMul(slConstants.clipToPrevClip, invCurrViewProj, prevViewProjSL); + + // When viewport scaling is active, cameraViewToClip is adjusted (narrower FOV), + // changing the clip space. clipToPrevClip (computed from unadjusted VP) maps between + // unadjusted clip spaces. We must conjugate it to map between adjusted clip spaces: + // CTP_adj = inv(S) * CTP * S + // where S = diag(invScale, invScale, 1, 1), inv(S) = diag(vpScale, vpScale, 1, 1). + // + // Derivation (row-vector convention: clip = view * P): + // clip_adj = clip_unadj * S (scaling rows 0,1 of P scales clip x,y by invScale) + // clip_prev_adj = clip_prev_unadj * S + // clip_prev_unadj = clip_curr_unadj * CTP + // clip_prev_adj = (clip_curr_adj * inv(S)) * CTP * S = clip_curr_adj * (inv(S) * CTP * S) + // + // Element-wise: CTP_adj[i][j] = inv(S)[i] * CTP[i][j] * S[j] + // Rows 0,1, cols 0,1: vpScale * invScale = 1 (unchanged) + // Rows 0,1, cols 2,3: vpScale * 1 = vpScale + // Rows 2,3, cols 0,1: 1 * invScale = invScale + // Rows 2,3, cols 2,3: unchanged + // + // This ensures clipToPrevClip agrees with per-pixel motion vectors. + // Without correct conjugation, DLSS sees disagreement between the camera-predicted + // motion and per-pixel motion vectors, causing it to reject temporal accumulation + // during camera motion. (When still, CTP ≈ I, and inv(S)*I*S = I → no mismatch.) + if (vpScale < 1.0f) { + float invScale = 1.0f / vpScale; + // Rows 0,1 cols 2,3: multiply by vpScale (from left-multiply by inv(S)) + slConstants.clipToPrevClip[0].z *= vpScale; + slConstants.clipToPrevClip[0].w *= vpScale; + slConstants.clipToPrevClip[1].z *= vpScale; + slConstants.clipToPrevClip[1].w *= vpScale; + // Rows 2,3 cols 0,1: multiply by invScale (from right-multiply by S) + slConstants.clipToPrevClip[2].x *= invScale; + slConstants.clipToPrevClip[2].y *= invScale; + slConstants.clipToPrevClip[3].x *= invScale; + slConstants.clipToPrevClip[3].y *= invScale; + } + sl::matrixFullInvert(slConstants.prevClipToClip, slConstants.clipToPrevClip); + + // Per-eye diagnostic logging for temporal quality investigation + { + static uint32_t ctpDiagCounter = 0; + bool ctpDiag = (ctpDiagCounter++ % 300 == 0) || (TAAReorder::g_diagCounter == 0 && vpScale < 1.0f); + if (ctpDiag) { + auto& ctp = slConstants.clipToPrevClip; + logger::info("[DLSS-CTP] Eye {} clipToPrevClip diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, ctp[0].x, ctp[1].y, ctp[2].z, ctp[3].w); + logger::info("[DLSS-CTP] Eye {} prevVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, prevViewProjSL[0].x, prevViewProjSL[1].y, prevViewProjSL[2].z, prevViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} currVP diag=({:.6f},{:.6f},{:.6f},{:.6f})", + eyeIndex, currViewProjSL[0].x, currViewProjSL[1].y, currViewProjSL[2].z, currViewProjSL[3].w); + logger::info("[DLSS-CTP] Eye {} cameraPos=({:.2f},{:.2f},{:.2f}) fov={:.4f} mvecScale=({:.4f},{:.4f})", + eyeIndex, slConstants.cameraPos.x, slConstants.cameraPos.y, slConstants.cameraPos.z, + slConstants.cameraFOV, slConstants.mvecScale.x, slConstants.mvecScale.y); + } + } } else { recalculateCameraMatrices(slConstants); } @@ -268,7 +360,26 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye slConstants.jitterOffset = { -jitter.x, -jitter.y }; slConstants.reset = sl::Boolean::eFalse; - slConstants.mvecScale = { 1.0f, 1.0f }; + // mvecScale normalizes motion vectors to [-1,1] range. The Streamline DLSS plugin + // then multiplies by the input render dimensions to get pixel displacement: + // MV_Scale = mvecScale * renderWidth + // The game's motion vectors are in [-1,1] normalized to the FULL per-eye dimensions. + // Without viewport scaling, renderWidth = eyeWidthIn → MV_Scale = eyeWidthIn → correct. + // With viewport scaling, renderWidth = cropWidthIn = eyeWidthIn * vpScale, so DLSS + // underestimates motion by vpScale. Compensate by scaling mvecScale by 1/vpScale. + if (globals::game::isVR && std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f) < 1.0f) { + float invScale = 1.0f / std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + slConstants.mvecScale = { invScale, invScale }; + } else { + slConstants.mvecScale = { 1.0f, 1.0f }; + } + // Log mvecScale after assignment (was previously logged before assignment, showing uninitialized values) + if (globals::game::isVR && TAAReorder::g_diagCounter == 0 && std::clamp(globals::features::upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f) < 1.0f) { + logger::info("[TAAReorder] Eye {} mvecScale=({:.4f},{:.4f}) jitter=({:.4f},{:.4f})", + eyeIndex, slConstants.mvecScale.x, slConstants.mvecScale.y, + slConstants.jitterOffset.x, slConstants.jitterOffset.y); + } + slConstants.motionVectors3D = sl::Boolean::eFalse; slConstants.motionVectorsInvalidValue = FLT_MIN; slConstants.orthographicProjection = sl::Boolean::eFalse; @@ -277,6 +388,13 @@ void Streamline::CheckFrameConstants(sl::ViewportHandle p_viewport, uint32_t eye if (SL_FAILED(res, slSetConstants(slConstants, *frameToken, p_viewport))) { logger::error("[Streamline] Could not set constants for eye {}", eyeIndex); + } else { + static uint32_t constDiagCounter = 0; + if (constDiagCounter++ % 300 == 0) { + logger::info("[Streamline] slSetConstants OK eye={} jitter=({:.4f},{:.4f}) fov={:.4f}", + eyeIndex, slConstants.jitterOffset.x, slConstants.jitterOffset.y, + slConstants.cameraFOV); + } } } @@ -304,7 +422,7 @@ bool Streamline::IsRTXAndBelow40Series(IDXGIAdapter* a_adapter) return false; } -void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) +void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height) { sl::DLSSOptions dlssOptions{}; @@ -328,10 +446,8 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) break; } - auto state = globals::state; - dlssOptions.outputWidth = width; - dlssOptions.outputHeight = (uint)state->screenSize.y; + dlssOptions.outputHeight = height; // Detect HDR from kMAIN format at runtime -- VR kMAIN may be 8-bit while SE is FP16 { @@ -394,7 +510,7 @@ void Streamline::SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width) void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth) + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight) { auto context = globals::d3d::context; @@ -406,7 +522,7 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, sl::Resource transparencyMaskRes = { sl::ResourceType::eTex2d, transparencyMask, 0 }; CheckFrameConstants(vp, eyeIndex); - SetDLSSOptions(vp, outputWidth); + SetDLSSOptions(vp, outputWidth, outputHeight); sl::ResourceTag tags[] = { { &colorInRes, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &extentIn }, @@ -438,17 +554,24 @@ void Streamline::EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, if (state->frameAnnotations) state->EndPerfEvent(); - if (evalResult != sl::Result::eOk) { - static bool evalErrorLogged[2] = { false, false }; + // Rate-limited diagnostic logging for DLSS evaluation results + { + static uint32_t evalDiagCounter[2] = { 0, 0 }; uint32_t logIdx = globals::game::isVR ? eyeIndex : 0; - if (!evalErrorLogged[logIdx]) { - evalErrorLogged[logIdx] = true; - logger::error("[Streamline] slEvaluateFeature failed{} result={}", globals::game::isVR ? std::format(" for eye {}", eyeIndex) : "", (int)evalResult); + bool diagLog = (evalDiagCounter[logIdx]++ % 300 == 0); + + if (evalResult != sl::Result::eOk) { + if (diagLog) { + logger::error("[Streamline] slEvaluateFeature FAILED eye={} result={} (frame {})", + eyeIndex, (int)evalResult, evalDiagCounter[logIdx]); + } + } else if (diagLog) { + logger::info("[Streamline] slEvaluateFeature OK eye={} (frame {})", eyeIndex, evalDiagCounter[logIdx]); } } } -void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors) +void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only) { auto state = globals::state; @@ -458,9 +581,15 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r auto screenSize = state->screenSize; auto renderSize = Util::ConvertToDynamic(screenSize); - // VR: Combined-buffer mode with extent offsets causes temporal ghosting on the right eye - // because DLSS's internal history buffers use extent offsets as indices. - // Per-eye isolation with extents at {0,0} is required. + // VR: Per-eye isolation is required. Each eye uses a separate per-eye texture + // with its own viewport handle, avoiding cross-eye history contamination. + // When viewport scaling is active (vrDlssViewportScale < 1.0): + // - All DLSS inputs are physically cropped to the center sub-region at {0,0}. + // This eliminates non-zero subrect base offsets which break temporal reprojection. + // - Camera matrices are adjusted in CheckFrameConstants to match the crop's FOV. + // - FillPeriphery bilinear-upscales the full render-res input to vrFinalOutput, + // then FinalizePerEyeOutputs pastes the DLSS crop output into the center. + // When viewport scaling is off (scale == 1.0), all textures are full-size at {0,0}. if (globals::game::isVR) { auto& upscaling = globals::features::upscaling; uint32_t eyeWidthOut = (uint32_t)(screenSize.x / 2); @@ -468,21 +597,50 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r uint32_t eyeWidthIn = (uint32_t)(renderSize.x / 2); uint32_t eyeHeightIn = (uint32_t)renderSize.y; + float vpScale = std::clamp(upscaling.settings.vrDlssViewportScale, 0.5f, 1.0f); + bool viewportScaling = vpScale < 1.0f; + + uint32_t dlssWidthIn = viewportScaling ? (uint32_t)(eyeWidthIn * vpScale) : eyeWidthIn; + uint32_t dlssHeightIn = viewportScaling ? (uint32_t)(eyeHeightIn * vpScale) : eyeHeightIn; + uint32_t dlssWidthOut = viewportScaling ? (uint32_t)(eyeWidthOut * vpScale) : eyeWidthOut; + uint32_t dlssHeightOut = viewportScaling ? (uint32_t)(eyeHeightOut * vpScale) : eyeHeightOut; + upscaling.PreparePerEyeInputs(a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask); - for (uint32_t i = 0; i < 2; ++i) { + // Periphery TAA diagnostic + if (TAAReorder::g_diagCounter == 0 && viewportScaling && upscaling.settings.vrPeripheryTAA) { + logger::info("[TAAReorder] Periphery TAA: vrTAAdPerEye[0]={}, g_initialized={} (TAA injected at display RT level)", + (void*)upscaling.vrTAAdPerEye[0].get(), TAAReorder::g_initialized); + } + + uint32_t eyeCount = eye0Only ? 1 : 2; + for (uint32_t i = 0; i < eyeCount; ++i) { sl::ViewportHandle vp = (i == 1) ? viewportRight : viewport; - sl::Extent extentIn{ 0, 0, eyeWidthIn, eyeHeightIn }; - sl::Extent extentOut{ 0, 0, eyeWidthOut, eyeHeightOut }; + + if (viewportScaling) { + // Pre-fill composition target with bilinear upscale of full render-res eye. + // DLSS output is pasted on top in FinalizePerEyeOutputs. + upscaling.FillPeriphery(i, eyeWidthIn, eyeHeightIn, eyeWidthOut, eyeHeightOut); + } + + // All extents are {0,0} - inputs are physically crop-sized (or full-sized when not scaling). + // No non-zero subrect base offsets, which is critical for DLSS temporal reprojection. + sl::Extent extentIn = { 0, 0, dlssWidthIn, dlssHeightIn }; + sl::Extent extentOut = { 0, 0, dlssWidthOut, dlssHeightOut }; + + // When viewport scaling, use crop-sized vrCropColorIn; otherwise use full vrIntermediateColorIn + ID3D11Resource* colorInput = viewportScaling ? + upscaling.vrCropColorIn[i]->resource.get() : + upscaling.vrIntermediateColorIn[i]->resource.get(); EvaluateDLSS(vp, i, - upscaling.vrIntermediateColorIn[i]->resource.get(), upscaling.vrIntermediateColorOut[i]->resource.get(), + colorInput, upscaling.vrIntermediateColorOut[i]->resource.get(), upscaling.vrIntermediateDepth[i]->resource.get(), upscaling.vrIntermediateMotionVectors[i]->resource.get(), upscaling.vrIntermediateReactiveMask[i]->resource.get(), upscaling.vrIntermediateTransparencyMask[i]->resource.get(), - extentIn, extentOut, eyeWidthOut); + extentIn, extentOut, dlssWidthOut, dlssHeightOut); } - upscaling.FinalizePerEyeOutputs(a_upscalingTexture); + upscaling.FinalizePerEyeOutputs(a_upscalingTexture, eye0Only); } else { // Non-VR: Simple full-texture upscale sl::Extent extentIn{ 0, 0, (uint)renderSize.x, (uint)renderSize.y }; @@ -491,7 +649,7 @@ void Streamline::Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_r EvaluateDLSS(viewport, 0, a_upscalingTexture, a_upscalingTexture, depthTexture.texture, a_motionVectors, a_reactiveMask, a_transparencyCompositionMask, - extentIn, extentOut, (uint)screenSize.x); + extentIn, extentOut, (uint)screenSize.x, (uint)screenSize.y); } } /** diff --git a/src/Features/Upscaling/Streamline.h b/src/Features/Upscaling/Streamline.h index 0f771fb9a0..348f0b6849 100644 --- a/src/Features/Upscaling/Streamline.h +++ b/src/Features/Upscaling/Streamline.h @@ -70,7 +70,7 @@ class Streamline void EvaluateDLSS(sl::ViewportHandle vp, uint32_t eyeIndex, ID3D11Resource* colorIn, ID3D11Resource* colorOut, ID3D11Resource* depth, ID3D11Resource* mvec, ID3D11Resource* reactiveMask, ID3D11Resource* transparencyMask, - const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth); + const sl::Extent& extentIn, const sl::Extent& extentOut, uint32_t outputWidth, uint32_t outputHeight); // Cached DLL version info for Streamline plugin directory static std::vector> dllVersions; @@ -85,9 +85,9 @@ class Streamline bool IsRTXAndBelow40Series(IDXGIAdapter* a_adapter); - void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width); + void SetDLSSOptions(sl::ViewportHandle p_viewport, uint32_t width, uint32_t height); - void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors); + void Upscale(ID3D11Resource* a_upscalingTexture, ID3D11Resource* a_reactiveMask, ID3D11Resource* a_transparencyCompositionMask, ID3D11Resource* a_motionVectors, bool eye0Only = false); void DestroyDLSSResources(); }; diff --git a/src/Features/VR.cpp b/src/Features/VR.cpp index e6ed6af7bb..ecc6bcc1d0 100644 --- a/src/Features/VR.cpp +++ b/src/Features/VR.cpp @@ -44,7 +44,8 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( EnableStereoBlend, StereoBlendDepthSigma, StereoBlendMaxFactor, - StereoBlendColorThreshold) + StereoBlendColorThreshold, + StereoBlendDebugMode) //============================================================================= // FEATURE BASE CLASS OVERRIDES @@ -54,16 +55,26 @@ void VR::LoadSettings(json& o_json) { settings = o_json.get(); settings.ClampToValidRanges(); + if (o_json.contains("StereoOptimizations")) { + json stereoOptJson = o_json["StereoOptimizations"]; + stereoOpt.LoadSettings(stereoOptJson); + } } void VR::SaveSettings(json& o_json) { o_json = settings; + { + json stereoOptJson; + stereoOpt.SaveSettings(stereoOptJson); + o_json["StereoOptimizations"] = stereoOptJson; + } } void VR::RestoreDefaultSettings() { settings = {}; + stereoOpt.RestoreDefaultSettings(); } void VR::SetupResources() @@ -88,6 +99,12 @@ void VR::SetupResources() if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", edgeDetectionDefines, "cs_5_0"))) stereoBlendDebugEdgeDetectionCS.attach(rawPtr); + // Overwrite mode: direct replacement instead of blend (for stencil culling) + auto overwriteDefines = defines; + overwriteDefines.push_back({ "STEREO_OVERWRITE", "" }); + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\VR\\StereoBlendCS.hlsl", overwriteDefines, "cs_5_0"))) + stereoBlendOverwriteCS.attach(rawPtr); + auto renderer = globals::game::renderer; auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; D3D11_TEXTURE2D_DESC mainDesc; @@ -103,6 +120,11 @@ void VR::SetupResources() stereoBlendCopyTex->CreateSRV(srvDesc); stereoBlendCB = eastl::make_unique(ConstantBufferDesc()); + if (REL::Module::IsVR()) { + stereoOpt.SetupResources(); + stereoOpt.loaded = stereoOpt.GetModeTextureSRV() != nullptr; + } + DetectOpenVRInfo(); if (openVRInfo.isAvailable) { @@ -274,3 +296,8 @@ bool VR::IsOpenVRCompatible() const { return globals::game::isVR && openVRInfo.isCompatible; } + +void VR::Reset() +{ + stereoOpt.Reset(); +} diff --git a/src/Features/VR.h b/src/Features/VR.h index 06789eaac3..fe8f28bb79 100644 --- a/src/Features/VR.h +++ b/src/Features/VR.h @@ -3,6 +3,7 @@ #include "OverlayFeature.h" #include "Utils/Input.h" #include "VR/OpenVRDetection.h" // In Features/VR/ +#include "VRStereoOptimizations.h" #include #include #include @@ -109,6 +110,9 @@ struct VR : OverlayFeature }; } + virtual inline std::string_view GetShaderDefineName() override { return "VR_STEREO_OPT"; } + virtual inline bool HasShaderDefine(RE::BSShader::Type t) override { return stereoOpt.loaded && t == RE::BSShader::Type::Utility; } + virtual void Reset() override; virtual void SetupResources() override; virtual void ClearShaderCache() override; virtual bool SupportsVR() override { return true; } @@ -260,7 +264,7 @@ struct VR : OverlayFeature StereoBlendDepthSigma = std::clamp(StereoBlendDepthSigma, 0.001f, 0.1f); StereoBlendMaxFactor = std::clamp(StereoBlendMaxFactor, 0.0f, 0.5f); StereoBlendColorThreshold = std::clamp(StereoBlendColorThreshold, 0.0f, 0.2f); - StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 3); + StereoBlendDebugMode = std::clamp(StereoBlendDebugMode, 0, 5); } }; @@ -358,8 +362,12 @@ struct VR : OverlayFeature winrt::com_ptr stereoBlendDebugBackCheckCS; winrt::com_ptr stereoBlendDebugBlendWeightCS; winrt::com_ptr stereoBlendDebugEdgeDetectionCS; + winrt::com_ptr stereoBlendOverwriteCS; eastl::unique_ptr stereoBlendCopyTex; eastl::unique_ptr stereoBlendCB; + winrt::com_ptr stereoBlendLinearSampler; + + VRStereoOptimizations stereoOpt; struct alignas(16) StereoBlendCB { @@ -368,7 +376,11 @@ struct VR : OverlayFeature float DepthSigma; float MaxBlendFactor; float ColorDiffThreshold; - float pad; + float DebugEdgeTint; + uint32_t DebugMode; + float FullBlendDistance; + float POMDepthScale; + float _pad; }; // Engine hook integration points diff --git a/src/Features/VR/SettingsUI.cpp b/src/Features/VR/SettingsUI.cpp index 5be4fc156c..55ebe0e3a5 100644 --- a/src/Features/VR/SettingsUI.cpp +++ b/src/Features/VR/SettingsUI.cpp @@ -73,7 +73,7 @@ void VR::DrawOverlay() static LARGE_INTEGER overlayShowStart = { 0 }; static LARGE_INTEGER freq = { 0 }; - bool shouldShow = settings.kAutoHideSeconds > 0 && globals::state->isMainMenuOpen && globals::menu && !globals::menu->IsEnabled; + bool shouldShow = settings.kAutoHideSeconds > 0 && globals::game::ui && globals::game::ui->IsMenuOpen(RE::MainMenu::MENU_NAME) && globals::menu && !globals::menu->IsEnabled; if (!shouldShow) { overlayShowStart.QuadPart = 0; @@ -108,7 +108,7 @@ void VR::DrawOverlay() ImGui::Begin("HowToUseOverlay", nullptr, ImGuiWindowFlags_NoTitleBar | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav); - ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f * scale); + ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f); ImGui::TextWrapped("How to Use VR Community Shaders Menu:"); ImGui::Separator(); ImGui::TextWrapped("You must open the Main Menu or Tween Menu before VR controls work."); @@ -124,7 +124,7 @@ void VR::DrawOverlay() Util::DrawButtonCombo(settings.VRMenuCloseKeys, true); ImGui::Spacing(); - ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f * scale); + ImGui::PushTextWrapPos(ImGui::GetCursorPos().x + 500.0f); ImGui::TextWrapped("Grip + Thumbstick: Adjust overlay depth (closer/farther)"); ImGui::Spacing(); ImGui::TextWrapped("Tip: Disable this VR overlay by setting Attach Mode to 'None' in VR settings."); @@ -324,25 +324,16 @@ namespace ImGui::Separator(); - const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection" }; + const char* debugModes[] = { "Off", "Back-Check", "Blend Weight", "Edge Detection", "Overwrite", "Overwrite Eye1" }; ImGui::Combo("Debug View", &settings.StereoBlendDebugMode, debugModes, IM_ARRAYSIZE(debugModes)); if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text( - "Off: Normal rendering.\n\n" - "Back-Check: Visualize reprojection outcomes.\n" - " Blue = sky or HMD mask (skipped).\n" - " Yellow = source edge rejected (depth discontinuity at this pixel).\n" - " Orange = destination edge rejected (discontinuity at reprojected pixel).\n" - " Grey = other eye can't see this point (out of bounds).\n" - " Green = back-check passed (surfaces match in both eyes).\n" - " Red = back-check failed (occlusion edge, blend penalized).\n\n" - "Blend Weight: Heatmap of stereo blend strength.\n" - " Cool/black = no blending. Hot/white = maximum blending.\n" - " Shows where the two eyes disagree and correction is applied.\n\n" - "Edge Detection: Highlights pixels excluded by depth discontinuity checks.\n" - " Yellow = source edge (discontinuity at this pixel).\n" - " Orange = destination edge (discontinuity at reprojected pixel).\n" - " Scene = all other pixels shown with normal blending."); + ImGui::Text("Stereo blend debug visualization modes:"); + ImGui::Text(" Off: Normal rendering"); + ImGui::Text(" Back-Check: Shows round-trip reprojection validation"); + ImGui::Text(" Blend Weight: Heatmap of bilateral blend intensity"); + ImGui::Text(" Edge Detection: Highlights depth discontinuities"); + ImGui::Text(" Overwrite: Shows stereo reprojection mode classification"); + ImGui::Text(" (Eye 0 = left eye, fully shaded; Eye 1 = right eye, reprojected)"); } ImGui::EndDisabled(); @@ -970,6 +961,9 @@ void VR::DrawSettings() if (BeginTabItemWithFont("Stereo", Menu::FontRole::Subheading)) { if (ImGui::BeginChild("##VRStereoFrame", { 0, 0 }, true)) { DrawStereoBlendSettings(); + if (ImGui::CollapsingHeader("Stereo Optimizations", ImGuiTreeNodeFlags_DefaultOpen)) { + stereoOpt.DrawSettings(); + } } ImGui::EndChild(); ImGui::EndTabItem(); diff --git a/src/Features/VR/StereoBlend.cpp b/src/Features/VR/StereoBlend.cpp index 1fa5d22240..e71e835cc2 100644 --- a/src/Features/VR/StereoBlend.cpp +++ b/src/Features/VR/StereoBlend.cpp @@ -1,9 +1,11 @@ #include "Features/VR.h" +#include "Deferred.h" #include "Features/DynamicCubemaps.h" #include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" #include "State.h" +#include "Utils/D3D.h" void VR::ClearShaderCache() { @@ -11,6 +13,8 @@ void VR::ClearShaderCache() stereoBlendDebugBackCheckCS = nullptr; stereoBlendDebugBlendWeightCS = nullptr; stereoBlendDebugEdgeDetectionCS = nullptr; + stereoBlendOverwriteCS = nullptr; + stereoOpt.ClearShaderCache(); } bool VR::AnyScreenSpaceEffectLoaded() @@ -22,10 +26,20 @@ bool VR::AnyScreenSpaceEffectLoaded() void VR::DrawStereoBlend() { - if (!REL::Module::IsVR() || !settings.EnableStereoBlend || !stereoBlendCS || !stereoBlendCopyTex || !stereoBlendCB) + bool vrStereoOptActive = globals::features::vr.stereoOpt.loaded && + globals::features::vr.stereoOpt.settings.stereoMode != VRStereoOptimizations::StereoMode::Off && + stereoBlendOverwriteCS; + + if (!REL::Module::IsVR() || !stereoBlendCopyTex || !stereoBlendCB) + return; + + if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugSkipMerge) + return; + + if (!vrStereoOptActive && (!settings.EnableStereoBlend || !stereoBlendCS)) return; - if (!AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) + if (!vrStereoOptActive && !AnyScreenSpaceEffectLoaded() && !globals::state->IsDeveloperMode()) return; ZoneScoped; @@ -55,37 +69,117 @@ void VR::DrawStereoBlend() cbData.MaxBlendFactor = settings.StereoBlendMaxFactor; cbData.ColorDiffThreshold = settings.StereoBlendColorThreshold; + // Pass debug edge tint from VRStereoOptimizations settings + if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugVisualization) + cbData.DebugEdgeTint = 0.3f; + else + cbData.DebugEdgeTint = 0.0f; + + // Debug mode: 0=normal, 1=depth map diagnostic, 2=full blend depth visualizer + if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugDepthMap) + cbData.DebugMode = 1u; + else if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugFullBlendDepth) + cbData.DebugMode = 2u; + else if (vrStereoOptActive && globals::features::vr.stereoOpt.settings.debugPOMDepth) + cbData.DebugMode = 3u; + else + cbData.DebugMode = 0u; + + cbData.FullBlendDistance = vrStereoOptActive ? globals::features::vr.stereoOpt.settings.fullBlendDistance : 0.0f; + cbData.POMDepthScale = vrStereoOptActive ? globals::features::vr.stereoOpt.settings.pomDepthScale : 1.0f; + stereoBlendCB->Update(cbData); auto cbPtr = stereoBlendCB->CB(); - ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; - ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + auto& motionVectors = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR]; + + bool isOverwriteMode = vrStereoOptActive; ID3D11ComputeShader* activeCS = stereoBlendCS.get(); - if (settings.StereoBlendDebugMode == 1 && stereoBlendDebugBackCheckCS) - activeCS = stereoBlendDebugBackCheckCS.get(); - else if (settings.StereoBlendDebugMode == 2 && stereoBlendDebugBlendWeightCS) - activeCS = stereoBlendDebugBlendWeightCS.get(); - else if (settings.StereoBlendDebugMode == 3 && stereoBlendDebugEdgeDetectionCS) - activeCS = stereoBlendDebugEdgeDetectionCS.get(); + if (vrStereoOptActive) { + activeCS = stereoBlendOverwriteCS.get(); + } else { + int effectiveMode = settings.StereoBlendDebugMode; + if (effectiveMode == 1 && stereoBlendDebugBackCheckCS) + activeCS = stereoBlendDebugBackCheckCS.get(); + else if (effectiveMode == 2 && stereoBlendDebugBlendWeightCS) + activeCS = stereoBlendDebugBlendWeightCS.get(); + else if (effectiveMode == 3 && stereoBlendDebugEdgeDetectionCS) + activeCS = stereoBlendDebugEdgeDetectionCS.get(); + } + + // Save and unbind DSV to avoid SRV/DSV conflict on depth buffer in overwrite mode + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + if (isOverwriteMode) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + } + ID3D11ShaderResourceView* srvs[2]{ stereoBlendCopyTex->srv.get(), depthSRV }; context->CSSetConstantBuffers(1, 1, &cbPtr); context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetShader(activeCS, nullptr, 0); + if (isOverwriteMode) { + ID3D11ShaderResourceView* modeSRV = globals::features::vr.stereoOpt.GetModeTextureSRV(); + context->CSSetShaderResources(2, 1, &modeSRV); + + // Bind REFLECTANCE SRV for POM depth offset (stored in .w by Lighting pass) + auto& reflectanceRT = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; + context->CSSetShaderResources(3, 1, &reflectanceRT.SRV); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, motionVectors.UAV }; + context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + } else { + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + } + + // Bind linear sampler for hardware bilinear color sampling in overwrite mode + if (isOverwriteMode) { + if (!stereoBlendLinearSampler) { + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + globals::d3d::device->CreateSamplerState(&sampDesc, stereoBlendLinearSampler.put()); + } + ID3D11SamplerState* samplers[] = { stereoBlendLinearSampler.get() }; + context->CSSetSamplers(0, 1, samplers); + } + + context->CSSetShader(activeCS, nullptr, 0); context->Dispatch(dispatchCount.x, dispatchCount.y, 1); // Cleanup - srvs[0] = nullptr; - srvs[1] = nullptr; - uavs[0] = nullptr; - cbPtr = nullptr; - context->CSSetShaderResources(0, 2, srvs); - context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - context->CSSetConstantBuffers(1, 1, &cbPtr); + ID3D11ShaderResourceView* nullSRVs[4] = {}; + context->CSSetShaderResources(0, isOverwriteMode ? 4 : 2, nullSRVs); + ID3D11UnorderedAccessView* nullUAVs[2] = {}; + context->CSSetUnorderedAccessViews(0, isOverwriteMode ? 2 : 1, nullUAVs, nullptr); + ID3D11Buffer* nullCB = nullptr; + context->CSSetConstantBuffers(1, 1, &nullCB); + if (isOverwriteMode) { + ID3D11SamplerState* nullSampler[] = { nullptr }; + context->CSSetSamplers(0, 1, nullSampler); + } context->CSSetShader(nullptr, nullptr, 0); + // Restore DSV after CS dispatch in overwrite mode + if (isOverwriteMode && savedDSV) { + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, nullptr); + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + savedDSV->Release(); + } + if (globals::state->frameAnnotations) globals::state->EndPerfEvent(); } diff --git a/src/Features/VRStereoOptimizations.cpp b/src/Features/VRStereoOptimizations.cpp new file mode 100644 index 0000000000..98da4c21ce --- /dev/null +++ b/src/Features/VRStereoOptimizations.cpp @@ -0,0 +1,649 @@ +#include "VRStereoOptimizations.h" + +#include "ExtendedMaterials.h" +#include "Globals.h" +#include "State.h" +#include "Utils/D3D.h" +#include "Utils/Game.h" + +#include + +// JSON enum serialization for StereoMode +NLOHMANN_JSON_SERIALIZE_ENUM(VRStereoOptimizations::StereoMode, { + { VRStereoOptimizations::StereoMode::Off, "Off" }, + { VRStereoOptimizations::StereoMode::Enable, "Enable" }, + }) + +//============================================================================= +// SETTINGS MANAGEMENT +//============================================================================= + +void VRStereoOptimizations::SaveSettings(json& o_json) +{ + o_json["StereoMode"] = settings.stereoMode; + o_json["DisocclusionDepthThreshold"] = settings.disocclusionDepthThreshold; + o_json["FullBlendDistance"] = settings.fullBlendDistance; + o_json["QualityJitterOffset"] = settings.qualityJitterOffset; + o_json["FoveatedRegionRadius"] = settings.foveatedRegionRadius; + o_json["FoveatedRegionCenterX"] = settings.foveatedRegionCenterX; + o_json["FoveatedRegionCenterY"] = settings.foveatedRegionCenterY; + o_json["UseEyeTracking"] = settings.useEyeTracking; + o_json["DebugVisualization"] = settings.debugVisualization; + o_json["DebugSkipMerge"] = settings.debugSkipMerge; + o_json["DebugForceAllStencil"] = settings.debugForceAllStencil; + o_json["DebugForceAllReprojectCS"] = settings.debugForceAllReprojectCS; + o_json["DebugDepthMap"] = settings.debugDepthMap; + o_json["POMDepthScale"] = settings.pomDepthScale; +} + +void VRStereoOptimizations::LoadSettings(json& o_json) +{ + if (o_json.contains("StereoMode")) + settings.stereoMode = o_json["StereoMode"].get(); + if (auto it = o_json.find("DisocclusionDepthThreshold"); it != o_json.end() && it->is_number()) + settings.disocclusionDepthThreshold = std::clamp(it->get(), 0.001f, 0.1f); + if (auto it = o_json.find("QualityJitterOffset"); it != o_json.end() && it->is_number()) + settings.qualityJitterOffset = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("FoveatedRegionRadius"); it != o_json.end() && it->is_number()) + settings.foveatedRegionRadius = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("FoveatedRegionCenterX"); it != o_json.end() && it->is_number()) + settings.foveatedRegionCenterX = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("FoveatedRegionCenterY"); it != o_json.end() && it->is_number()) + settings.foveatedRegionCenterY = std::clamp(it->get(), 0.0f, 1.0f); + if (auto it = o_json.find("UseEyeTracking"); it != o_json.end() && it->is_boolean()) + settings.useEyeTracking = it->get(); + if (auto it = o_json.find("DebugVisualization"); it != o_json.end() && it->is_boolean()) + settings.debugVisualization = it->get(); + if (auto it = o_json.find("DebugSkipMerge"); it != o_json.end() && it->is_boolean()) + settings.debugSkipMerge = it->get(); + if (auto it = o_json.find("DebugForceAllStencil"); it != o_json.end() && it->is_boolean()) + settings.debugForceAllStencil = it->get(); + if (auto it = o_json.find("DebugForceAllReprojectCS"); it != o_json.end() && it->is_boolean()) + settings.debugForceAllReprojectCS = it->get(); + if (auto it = o_json.find("DebugDepthMap"); it != o_json.end() && it->is_boolean()) + settings.debugDepthMap = it->get(); + if (auto it = o_json.find("FullBlendDistance"); it != o_json.end() && it->is_number()) + settings.fullBlendDistance = std::clamp(it->get(), 0.0f, 50000.0f); + if (auto it = o_json.find("POMDepthScale"); it != o_json.end() && it->is_number()) + settings.pomDepthScale = std::clamp(it->get(), 0.0f, 500.0f); +} + +void VRStereoOptimizations::RestoreDefaultSettings() +{ + settings = {}; +} + +//============================================================================= +// RESOURCE SETUP +//============================================================================= + +void VRStereoOptimizations::SetupResources() +{ + if (!REL::Module::IsVR()) + return; + + auto device = globals::d3d::device; + auto renderer = globals::game::renderer; + + // Constant buffers + paramsCB = eastl::make_unique(ConstantBufferDesc()); + + // Get main RT dimensions for per-eye calculations + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + D3D11_TEXTURE2D_DESC mainDesc; + main.texture->GetDesc(&mainDesc); + + // Per-pixel mode texture (R8_UINT, full SBS resolution = both eyes) + { + D3D11_TEXTURE2D_DESC modeDesc{}; + modeDesc.Width = mainDesc.Width; + modeDesc.Height = mainDesc.Height; + modeDesc.MipLevels = 1; + modeDesc.ArraySize = 1; + modeDesc.Format = DXGI_FORMAT_R8_UINT; + modeDesc.SampleDesc.Count = 1; + modeDesc.SampleDesc.Quality = 0; + modeDesc.Usage = D3D11_USAGE_DEFAULT; + modeDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + modeDesc.CPUAccessFlags = 0; + modeDesc.MiscFlags = 0; + + texPerPixelMode = eastl::make_unique(modeDesc); + texPerPixelMode->CreateSRV(D3D11_SHADER_RESOURCE_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { .MostDetailedMip = 0, .MipLevels = 1 } }); + texPerPixelMode->CreateUAV(D3D11_UNORDERED_ACCESS_VIEW_DESC{ + .Format = DXGI_FORMAT_R8_UINT, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } }); + } + + // Depth-stencil state for stencil write pass: + // Depth test OFF (not rendering geometry), stencil ALWAYS + REPLACE with ref=1 + { + D3D11_DEPTH_STENCIL_DESC dssDesc{}; + dssDesc.DepthEnable = FALSE; + dssDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + dssDesc.StencilEnable = TRUE; + dssDesc.StencilReadMask = 0xFF; + dssDesc.StencilWriteMask = 0xFF; + dssDesc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + dssDesc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_REPLACE; + dssDesc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + dssDesc.BackFace = dssDesc.FrontFace; + + DX::ThrowIfFailed(device->CreateDepthStencilState(&dssDesc, stencilWriteDSS.put())); + } + + // Rasterizer state for stencil write: no culling, no depth clip + { + D3D11_RASTERIZER_DESC rsDesc{}; + rsDesc.FillMode = D3D11_FILL_SOLID; + rsDesc.CullMode = D3D11_CULL_NONE; + rsDesc.DepthClipEnable = FALSE; + + DX::ThrowIfFailed(device->CreateRasterizerState(&rsDesc, stencilWriteRS.put())); + } + + // Read-only depth DSV for stencil write pass: allows simultaneous depth SRV binding. + // We write stencil but never write depth, so D3D11_DSV_READ_ONLY_DEPTH is safe. + { + auto& depthData = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (depthData.views[0] && depthData.texture) { + D3D11_DEPTH_STENCIL_VIEW_DESC dsvDesc{}; + depthData.views[0]->GetDesc(&dsvDesc); + dsvDesc.Flags = D3D11_DSV_READ_ONLY_DEPTH; + + DX::ThrowIfFailed(device->CreateDepthStencilView(depthData.texture, &dsvDesc, stencilWriteReadOnlyDSV.put())); + } else { + logger::warn("[VRStereoOptimizations] Could not create read-only DSV: depth stencil data not available"); + } + } + + CompileShaders(); + + logger::info("[VRStereoOptimizations] Resources created: mode tex {}x{} (full SBS)", mainDesc.Width, mainDesc.Height); +} + +void VRStereoOptimizations::CompileShaders() +{ + std::vector> csDefines = { + { "VR", nullptr }, + { "FRAMEBUFFER", nullptr } + }; + + std::vector> vspsDefines = { + { "VR", nullptr } + }; + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", csDefines, "cs_5_0")) + stencilCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS"); + + { + auto debugDefines = csDefines; + debugDefines.push_back({ "DEBUG_DEPTH_MAP", nullptr }); + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilCS.hlsl", debugDefines, "cs_5_0")) + stencilDebugDepthMapCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilCS (DEBUG_DEPTH_MAP)"); + } + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWriteVS.hlsl", vspsDefines, "vs_5_0")) + stencilWriteVS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWriteVS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\StencilWritePS.hlsl", vspsDefines, "ps_5_0")) + stencilWritePS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile StencilWritePS"); + + if (auto* ptr = Util::CompileShader(L"Data\\Shaders\\VRStereoOptimizations\\ReprojectionCS.hlsl", csDefines, "cs_5_0")) + reprojectionCS.attach(reinterpret_cast(ptr)); + else + logger::error("[VRStereoOptimizations] Failed to compile ReprojectionCS"); +} + +void VRStereoOptimizations::ClearShaderCache() +{ + stencilCS = nullptr; + stencilDebugDepthMapCS = nullptr; + stencilWriteVS = nullptr; + stencilWritePS = nullptr; + reprojectionCS = nullptr; + dssCache.clear(); +} + +void VRStereoOptimizations::Reset() +{ + stencilActive = false; + stencilSwapCount = 0; +} + +//============================================================================= +// IMGUI SETTINGS +//============================================================================= + +void VRStereoOptimizations::DrawSettings() +{ + const char* modeNames[] = { "Off", "Enable" }; + int currentMode = static_cast(settings.stereoMode); + if (ImGui::Combo("Feature Enable", ¤tMode, modeNames, IM_ARRAYSIZE(modeNames))) + settings.stereoMode = static_cast(currentMode); + + if (settings.stereoMode == StereoMode::Off) + return; + + ImGui::SliderFloat("Disocclusion Depth Threshold", &settings.disocclusionDepthThreshold, 0.001f, 0.1f, "%.4f"); + + if (globals::state->IsDeveloperMode()) { + if (ImGui::TreeNode("Debug")) { + ImGui::SliderFloat("Full Blend Distance", &settings.fullBlendDistance, 0.0f, 10000.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Geometry closer than this distance (game units) is fully shaded in both eyes and bilaterally blended for 2x supersampling. 0 = disabled."); + + ImGui::SliderFloat("POM Depth Scale", &settings.pomDepthScale, 0.0f, 500.0f, "%.1f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Scale factor for POM depth correction in stereo reprojection.\n1.0 = physical scale. Increase for more visible POM stereo depth."); + ImGui::Checkbox("Skip Pixel Reprojection", &settings.debugSkipMerge); + ImGui::Checkbox("Full Blend Depth View", &settings.debugFullBlendDepth); + ImGui::Checkbox("Debug POM Depth", &settings.debugPOMDepth); + if (settings.debugFullBlendDepth) + ImGui::TextColored(ImVec4(0, 1, 1, 1), " Cyan = full blend zone (closer = stronger tint)"); + ImGui::Text("Stencil swaps this frame: %u", stencilSwapCount); + ImGui::TreePop(); + } + } +} + +//============================================================================= +// CONSTANT BUFFER UPDATE +//============================================================================= + +void VRStereoOptimizations::UpdateConstantBuffer() +{ + float2 resolution = Util::ConvertToDynamic(globals::state->screenSize); + + VRStereoOptParams params{}; + params.FrameDim[0] = resolution.x; + params.FrameDim[1] = resolution.y; + params.RcpFrameDim[0] = 1.0f / resolution.x; + params.RcpFrameDim[1] = 1.0f / resolution.y; + params.StereoModeValue = static_cast(settings.stereoMode); + params.DisocclusionThreshold = settings.disocclusionDepthThreshold; + params.EdgeDepthThreshold = settings.edgeDepthThreshold; + params.EdgeWidth = 2; + params.QualityJitter[0] = settings.qualityJitterOffset; + params.QualityJitter[1] = settings.qualityJitterOffset; + params.FoveatedRadius = settings.foveatedRegionRadius; + params.FoveatedCenter[0] = settings.foveatedRegionCenterX; + params.FoveatedCenter[1] = settings.foveatedRegionCenterY; + params.MinEdgeDistance = settings.minEdgeDistance; + params.FullBlendDistance = settings.fullBlendDistance; + + paramsCB->Update(params); +} + +//============================================================================= +// PHASE 1: STENCIL CLASSIFICATION + WRITE +//============================================================================= + +void VRStereoOptimizations::DispatchStencil() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!stencilCS || !stencilWriteVS || !stencilWritePS || !texPerPixelMode || !paramsCB || + !stencilWriteReadOnlyDSV || !stencilWriteDSS || !stencilWriteRS) + return; + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Stencil"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Stencil"); + + auto context = globals::d3d::context; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + // Use live depth buffer (kMAIN) instead of kPOST_ZPREPASS_COPY — at StartDeferred time, + // kPOST_ZPREPASS_COPY is stale (previous frame). kMAIN has fresh z-prepass depth so + // StencilCS can correctly detect sky-vs-geometry edges in the current frame. + auto renderer = globals::game::renderer; + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + if (!depthSRV) { + logger::warn("[VRStereoOptimizations] DispatchStencil: depthSRV is null, skipping"); + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); + return; + } + + // Dispatch classification CS over Eye 1 region + // Input: t0 = depth, b1 = params CB + // Output: u0 = per-pixel mode texture + { + ID3D11ShaderResourceView* srvs[1]{ depthSRV }; + ID3D11UnorderedAccessView* uavs[1]{ texPerPixelMode->uav.get() }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 1, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + auto* activeStencilCS = (settings.debugDepthMap && stencilDebugDepthMapCS) ? stencilDebugDepthMapCS.get() : stencilCS.get(); + context->CSSetShader(activeStencilCS, nullptr, 0); + + uint32_t fullWidth = texPerPixelMode->desc.Width; + uint32_t fullHeight = texPerPixelMode->desc.Height; + context->Dispatch((fullWidth + 7) / 8, (fullHeight + 7) / 8, 1); + + // Cleanup CS bindings + ID3D11ShaderResourceView* nullSRV = nullptr; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 1, &nullSRV); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + } + + // Transfer classification to hardware stencil buffer + ExecuteStencilWritePass(); + + stencilActive = true; + stencilSwapCount = 0; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::ExecuteStencilWritePass() +{ + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + + // ===== SAVE FULL D3D11 PIPELINE STATE ===== + + ID3D11RenderTargetView* savedRTVs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {}; + ID3D11DepthStencilView* savedDSV = nullptr; + context->OMGetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, &savedDSV); + + ID3D11DepthStencilState* savedDSS = nullptr; + UINT savedStencilRef = 0; + context->OMGetDepthStencilState(&savedDSS, &savedStencilRef); + + ID3D11BlendState* savedBlendState = nullptr; + FLOAT savedBlendFactor[4] = {}; + UINT savedSampleMask = 0; + context->OMGetBlendState(&savedBlendState, savedBlendFactor, &savedSampleMask); + + ID3D11RasterizerState* savedRS = nullptr; + context->RSGetState(&savedRS); + + D3D11_VIEWPORT savedViewports[D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE] = {}; + UINT numViewports = D3D11_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE; + context->RSGetViewports(&numViewports, savedViewports); + + ID3D11VertexShader* savedVS = nullptr; + context->VSGetShader(&savedVS, nullptr, nullptr); + + ID3D11PixelShader* savedPS = nullptr; + context->PSGetShader(&savedPS, nullptr, nullptr); + + ID3D11GeometryShader* savedGS = nullptr; + context->GSGetShader(&savedGS, nullptr, nullptr); + + ID3D11InputLayout* savedInputLayout = nullptr; + context->IAGetInputLayout(&savedInputLayout); + + D3D11_PRIMITIVE_TOPOLOGY savedTopology = D3D11_PRIMITIVE_TOPOLOGY_UNDEFINED; + context->IAGetPrimitiveTopology(&savedTopology); + + ID3D11ShaderResourceView* savedPSSRVs[2] = {}; + context->PSGetShaderResources(0, 2, savedPSSRVs); + + ID3D11Buffer* savedPSCB = nullptr; + context->PSGetConstantBuffers(1, 1, &savedPSCB); + + // ===== SET UP STENCIL WRITE PASS ===== + + // Use our custom read-only-depth DSV to allow simultaneous depth SRV binding (t1). + // D3D11_DSV_READ_ONLY_DEPTH permits depth SRV + stencil write simultaneously. + // Using views[0] would cause D3D11 to silently NULL the depth SRV. + // depthData.readOnlyViews[0] has BOTH read-only flags and doesn't allow stencil writes. + // Clear stencil buffer to 0 before writing classification. + // The engine's z-prepass may have written stencil values (e.g., stencil=1) for rendered geometry. + // Without this clear, StencilWritePS discards for MODE_DISOCCLUDED pixels leave the engine's + // stencil value intact, which can match our NOT_EQUAL ref=1 culling test and incorrectly + // skip those pixels during the Lighting pass. + { + auto& depthData = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + context->ClearDepthStencilView(depthData.views[0], D3D11_CLEAR_STENCIL, 1.0f, 0); + } + + context->OMSetRenderTargets(0, nullptr, stencilWriteReadOnlyDSV.get()); + context->OMSetDepthStencilState(stencilWriteDSS.get(), 1); + context->RSSetState(stencilWriteRS.get()); + + // Eye 1 viewport (right half of SBS buffer) + { + D3D11_TEXTURE2D_DESC mainDesc; + renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN].texture->GetDesc(&mainDesc); + + D3D11_VIEWPORT vp{}; + vp.TopLeftX = static_cast(mainDesc.Width / 2); + vp.TopLeftY = 0.0f; + vp.Width = static_cast(mainDesc.Width / 2); + vp.Height = static_cast(mainDesc.Height); + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + context->RSSetViewports(1, &vp); + } + + // Bind shaders and mode texture + context->VSSetShader(stencilWriteVS.get(), nullptr, 0); + context->PSSetShader(stencilWritePS.get(), nullptr, 0); + context->GSSetShader(nullptr, nullptr, 0); + + ID3D11ShaderResourceView* modeSRV = texPerPixelMode->srv.get(); + context->PSSetShaderResources(0, 1, &modeSRV); + + auto* depthSRV = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN].depthSRV; + context->PSSetShaderResources(1, 1, &depthSRV); + + // Bind params CB to pixel shader (CS and PS have separate CB bindings) + auto cbPtr = paramsCB->CB(); + context->PSSetConstantBuffers(1, 1, &cbPtr); + + // Fullscreen triangle: no VB/IB, procedurally generated in VS + context->IASetInputLayout(nullptr); + context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + context->Draw(3, 0); + + // ===== RESTORE FULL D3D11 PIPELINE STATE ===== + + ID3D11ShaderResourceView* nullSRVs[2] = {}; + context->PSSetShaderResources(0, 2, nullSRVs); + + context->PSSetConstantBuffers(1, 1, &savedPSCB); + + context->OMSetRenderTargets(D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT, savedRTVs, savedDSV); + context->OMSetDepthStencilState(savedDSS, savedStencilRef); + context->OMSetBlendState(savedBlendState, savedBlendFactor, savedSampleMask); + context->RSSetState(savedRS); + context->RSSetViewports(numViewports, savedViewports); + context->VSSetShader(savedVS, nullptr, 0); + context->PSSetShader(savedPS, nullptr, 0); + context->GSSetShader(savedGS, nullptr, 0); + context->IASetInputLayout(savedInputLayout); + context->IASetPrimitiveTopology(savedTopology); + context->PSSetShaderResources(0, 2, savedPSSRVs); + + // Release COM references acquired by Get* calls + for (auto& rtv : savedRTVs) { + if (rtv) + rtv->Release(); + } + if (savedDSV) + savedDSV->Release(); + if (savedDSS) + savedDSS->Release(); + if (savedBlendState) + savedBlendState->Release(); + if (savedRS) + savedRS->Release(); + if (savedVS) + savedVS->Release(); + if (savedPS) + savedPS->Release(); + if (savedGS) + savedGS->Release(); + if (savedInputLayout) + savedInputLayout->Release(); + if (savedPSSRVs[0]) + savedPSSRVs[0]->Release(); + if (savedPSSRVs[1]) + savedPSSRVs[1]->Release(); + if (savedPSCB) + savedPSCB->Release(); +} + +void VRStereoOptimizations::PerformLateStencilWrite() +{ + // Placeholder for future multi-pass stencil strategies +} + +//============================================================================= +// DSS CACHE: CLONE + STENCIL NOT_EQUAL ENFORCEMENT +//============================================================================= + +ID3D11DepthStencilState* VRStereoOptimizations::GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS) +{ + if (!stencilActive) + return originalDSS; + + // Check cache (nullptr is a valid key — represents D3D11 default state) + if (auto it = dssCache.find(originalDSS); it != dssCache.end()) + return it->second.get(); + + D3D11_DEPTH_STENCIL_DESC desc; + if (originalDSS) { + originalDSS->GetDesc(&desc); + } else { + // D3D11 default state: depth enabled, stencil disabled + desc = {}; + desc.DepthEnable = TRUE; + desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; + desc.DepthFunc = D3D11_COMPARISON_LESS; + desc.StencilEnable = FALSE; + desc.StencilReadMask = D3D11_DEFAULT_STENCIL_READ_MASK; + desc.StencilWriteMask = D3D11_DEFAULT_STENCIL_WRITE_MASK; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; + desc.BackFace = desc.FrontFace; + } + + desc.StencilEnable = TRUE; + desc.StencilReadMask = 0xFF; + desc.StencilWriteMask = 0x00; + + desc.FrontFace.StencilFunc = D3D11_COMPARISON_NOT_EQUAL; + desc.FrontFace.StencilFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilDepthFailOp = D3D11_STENCIL_OP_KEEP; + desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; + desc.BackFace = desc.FrontFace; + + winrt::com_ptr modifiedDSS; + HRESULT hr = globals::d3d::device->CreateDepthStencilState(&desc, modifiedDSS.put()); + if (FAILED(hr)) { + logger::warn("[VRStereoOptimizations] Failed to create modified DSS (HRESULT: {:#x})", static_cast(hr)); + return originalDSS; + } + + auto* result = modifiedDSS.get(); + dssCache[originalDSS] = std::move(modifiedDSS); + + return result; +} + +//============================================================================= +// PHASE 3: REPROJECTION COMPUTE SHADER +//============================================================================= + +void VRStereoOptimizations::DispatchReprojection() +{ + if (!REL::Module::IsVR()) + return; + if (settings.stereoMode == StereoMode::Off) + return; + if (!reprojectionCS || !texPerPixelMode || !paramsCB) { + DeactivateStencil(); + return; + } + if (settings.debugSkipMerge) { + DeactivateStencil(); + return; + } + + ZoneScoped; + TracyD3D11Zone(globals::state->tracyCtx, "VR Stereo Opt - Reprojection"); + + if (globals::state->frameAnnotations) + globals::state->BeginPerfEvent("VR Stereo Opt - Reprojection"); + + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + + UpdateConstantBuffer(); + auto cbPtr = paramsCB->CB(); + auto* depthSRV = Util::GetCurrentSceneDepthSRV(); + + // Bind: t0 = depth, t1 = mode texture, u0 = main UAV, b1 = params + ID3D11ShaderResourceView* srvs[2]{ + depthSRV, + texPerPixelMode->srv.get() + }; + ID3D11UnorderedAccessView* uavs[1]{ main.UAV }; + + context->CSSetConstantBuffers(1, 1, &cbPtr); + context->CSSetShaderResources(0, 2, srvs); + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + context->CSSetShader(reprojectionCS.get(), nullptr, 0); + + // Dispatch over Eye 1 only (shader treats dtid as Eye 1 local coords) + uint32_t eyeWidth = texPerPixelMode->desc.Width / 2; + uint32_t eyeHeight = texPerPixelMode->desc.Height; + context->Dispatch((eyeWidth + 7) / 8, (eyeHeight + 7) / 8, 1); + + // Cleanup + ID3D11ShaderResourceView* nullSRVs[2] = {}; + ID3D11UnorderedAccessView* nullUAV = nullptr; + ID3D11Buffer* nullCB = nullptr; + context->CSSetShaderResources(0, 2, nullSRVs); + context->CSSetUnorderedAccessViews(0, 1, &nullUAV, nullptr); + context->CSSetConstantBuffers(1, 1, &nullCB); + context->CSSetShader(nullptr, nullptr, 0); + + // Stencil culling is done for this frame + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; + + if (globals::state->frameAnnotations) + globals::state->EndPerfEvent(); +} + +void VRStereoOptimizations::DeactivateStencil() +{ + if (!stencilActive) + return; + logger::trace("[VRStereoOptimizations] Frame: stencilSwapCount={}", stencilSwapCount); + stencilActive = false; +} diff --git a/src/Features/VRStereoOptimizations.h b/src/Features/VRStereoOptimizations.h new file mode 100644 index 0000000000..57683e45bf --- /dev/null +++ b/src/Features/VRStereoOptimizations.h @@ -0,0 +1,198 @@ +#pragma once + +#include +using json = nlohmann::json; + +#include +#include +#include + +/** + * @brief VR Stereo Rendering Optimizations feature. + * + * Uses hardware stencil culling to skip Eye 1 pixel shading for pixels that can be + * reprojected from Eye 0 via lateral stereo reprojection, then runs a compute shader + * to fill those pixels. This avoids redundant pixel shading in overlapping stereo regions. + * + * Pipeline: + * 1. DispatchStencil() - CS classifies per-pixel reprojection viability into a mode texture, + * then a fullscreen VS/PS pass writes that classification into the stencil buffer. + * 2. (Game renders Eye 1) - Hardware stencil test skips shading for marked pixels. + * 3. DispatchReprojection() - CS reprojects Eye 0 color into the skipped Eye 1 pixels. + */ +struct VRStereoOptimizations +{ + bool loaded = false; + + //============================================================================= + // ENUMS + //============================================================================= + + /// Operating mode for stereo reprojection + enum class StereoMode : uint32_t + { + Off = 0, ///< Feature disabled + Enable = 1 ///< Stereo reprojection enabled + }; + + /// Per-pixel classification written by StencilCS + enum PixelMode : uint8_t + { + MODE_DISOCCLUDED = 0, ///< Fully shaded, no reprojection, no blend + MODE_EDGE = 1, ///< Fully shaded + bilateral blend with other eye + MODE_MAIN = 2, ///< Eye 0: no reproject (Perf) / bilateral (Quality). Eye 1: overwrite (Perf) / bilateral (Quality) + MODE_EDGE_NEIGHBOUR = 3, ///< Outer band: background pixels near edge, blended in post-process + MODE_FULL_BLEND = 4, ///< Near-camera pixels: fully shaded in both eyes + bilateral blended + }; + + //============================================================================= + // PUBLIC METHODS + //============================================================================= + + void SetupResources(); + void Reset(); + void DrawSettings(); + void SaveSettings(json& o_json); + void LoadSettings(json& o_json); + void RestoreDefaultSettings(); + void ClearShaderCache(); + + //============================================================================= + // SETTINGS + //============================================================================= + + struct Settings + { + StereoMode stereoMode = StereoMode::Enable; + float disocclusionDepthThreshold = 0.01f; + float edgeDepthThreshold = 0.05f; + float minEdgeDistance = 5000.0f; ///< Minimum linearized depth for edge AA (game units) + float fullBlendDistance = 0.0f; ///< Linearized depth below which both eyes are fully shaded + blended (game units) + float pomDepthScale = 22.5f; ///< Scale factor for POM depth correction in stereo reprojection + bool debugFullBlendDepth = false; ///< Show full blend depth zone as cyan overlay + float qualityJitterOffset = 0.125f; + float foveatedRegionRadius = 0.3f; + float foveatedRegionCenterX = 0.5f; + float foveatedRegionCenterY = 0.5f; + bool useEyeTracking = false; + + int reprojectionMode = 5; // 0=Blend, 4=Overwrite, 5=Overwrite Eye1 Only + + // Debug controls + bool debugVisualization = false; + bool debugSkipMerge = false; + bool debugForceAllStencil = false; + bool debugForceAllReprojectCS = false; + bool debugDepthMap = false; + bool debugPOMDepth = false; ///< Show POM depth data (Reflectance.w) as heatmap overlay + + } settings; + + //============================================================================= + // GPU CONSTANT BUFFER (must match HLSL cbuffer layout exactly) + //============================================================================= + + struct alignas(16) VRStereoOptParams + { + float FrameDim[2]; // Full stereo buffer dimensions + float RcpFrameDim[2]; // 1.0 / FrameDim + + uint32_t StereoModeValue; // Cast of StereoMode enum (0-3) + float DisocclusionThreshold; + float EdgeDepthThreshold; + uint32_t EdgeWidth; + + float QualityJitter[2]; // Sub-pixel jitter offset (Quality mode) + float FoveatedRadius; + float pad2; + + float FoveatedCenter[2]; // Foveal region center UV + float MinEdgeDistance; + float FullBlendDistance; // Linearized depth for full blend zone + }; + static_assert(sizeof(VRStereoOptParams) % 16 == 0, "VRStereoOptParams must be 16-byte aligned for HLSL cbuffer."); + + //============================================================================= + // PUBLIC API + //============================================================================= + + /** + * @brief Classify Eye 1 pixels and write stencil marks. + * + * Dispatches the stencil classification CS, then performs a fullscreen triangle pass + * to write the classification into the hardware stencil buffer. + * Called from Deferred::StartDeferred() after OverrideBlendStates(). + */ + void DispatchStencil(); + + /** + * @brief Reproject Eye 0 color into stencil-culled Eye 1 pixels. + * + * Copies the main render target, then dispatches a CS to fill skipped pixels + * using lateral reprojection from Eye 0. + * Called from Deferred::DeferredPasses() after DeferredCompositeCS. + */ + void DispatchReprojection(); + + /** + * @brief Creates or retrieves a modified DSS with stencil NOT_EQUAL test. + * + * Clones the given DSS with read-only stencil (WriteMask=0x00, Func=NOT_EQUAL, ref=1) + * so that pixels marked by our stencil write pass are skipped during normal rendering. + * Cached per unique input DSS pointer. + * + * @param originalDSS The original depth-stencil state to modify. + * @return Modified DSS with stencil test, or original if creation fails. + */ + ID3D11DepthStencilState* GetOrCreateModifiedDSS(ID3D11DepthStencilState* originalDSS); + + /// Whether the stencil pass is currently active this frame + bool IsStencilActive() const { return stencilActive; } + + /// Deactivate stencil culling (called from Deferred after geometry rendering completes) + void DeactivateStencil(); + + /// Get mode texture SRV for external consumers (e.g., DeferredCompositeCS Eye 1 skip) + ID3D11ShaderResourceView* GetModeTextureSRV() const { return texPerPixelMode ? texPerPixelMode->srv.get() : nullptr; } + +private: + //============================================================================= + // INTERNAL METHODS + //============================================================================= + + /// Fullscreen triangle pass: reads mode texture, writes stencil ref=1 for MODE_MAIN pixels + void ExecuteStencilWritePass(); + + /// Late stencil write callback (placeholder for future multi-pass strategies) + void PerformLateStencilWrite(); + + /// Compiles all shaders used by this feature + void CompileShaders(); + + /// Updates the constant buffer with current settings and frame dimensions + void UpdateConstantBuffer(); + + //============================================================================= + // GPU RESOURCES + //============================================================================= + + eastl::unique_ptr paramsCB; + eastl::unique_ptr texPerPixelMode; ///< R8_UINT classification texture (full SBS resolution) + eastl::unique_ptr reprojectionCopyTex; ///< Copy of main RT for reprojection read + + winrt::com_ptr stencilWriteDSS; + winrt::com_ptr stencilWriteRS; + winrt::com_ptr stencilWriteReadOnlyDSV; ///< Read-only-depth DSV for stencil write pass (allows simultaneous depth SRV) + + winrt::com_ptr stencilCS; + winrt::com_ptr stencilDebugDepthMapCS; + winrt::com_ptr stencilWriteVS; + winrt::com_ptr stencilWritePS; + winrt::com_ptr reprojectionCS; + + /// Cache of original DSS -> modified DSS with stencil NOT_EQUAL enforcement + std::unordered_map> dssCache; + + bool stencilActive = false; + uint32_t stencilSwapCount = 0; +}; diff --git a/src/Globals.cpp b/src/Globals.cpp index e90c3bf4ce..52de7e7bd4 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -266,13 +266,79 @@ namespace globals { static void thunk(ID3D11DeviceContext* This, ID3D11Resource* pResource, UINT Subresource) { - if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) + if (*globals::game::perFrame.get() == pResource && globals::game::mappedFrameBuffer) { CacheFramebuffer(); + } func(This, pResource, Subresource); } static inline REL::Relocation func; }; + /** + * @brief Hooked OMSetDepthStencilState — replaces DSS with stencil-enforcing version when VR stereo opt is active. + * + * vtable index 36 for ID3D11DeviceContext::OMSetDepthStencilState. + * When VRStereoOptimizations has written stencil marks, this hook transparently swaps + * the game's DSS for a modified version that adds a stencil NOT_EQUAL test, causing + * marked Eye 1 pixels to be skipped during normal rendering. + */ + struct ID3D11DeviceContext_OMSetDepthStencilState + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilState* pDepthStencilState, UINT StencilRef) + { + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vr.stereoOpt; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + pDepthStencilState = stereoOpt.GetOrCreateModifiedDSS(pDepthStencilState); + StencilRef = 1; // Must match the ref written by our stencil pass + } + } + func(This, pDepthStencilState, StencilRef); + } + static inline REL::Relocation func; + }; + + /** + * @brief Hooked ClearDepthStencilView — blocks stencil clears when VR stereo opt stencil is active. + * + * vtable index 53 for ID3D11DeviceContext::ClearDepthStencilView. + * Prevents the game from clearing our stencil marks between the stencil write and + * the reprojection pass by stripping the D3D11_CLEAR_STENCIL flag. + */ + struct ID3D11DeviceContext_ClearDepthStencilView + { + static void thunk(ID3D11DeviceContext* This, ID3D11DepthStencilView* pDepthStencilView, UINT ClearFlags, FLOAT Depth, UINT8 Stencil) + { + if (globals::game::isVR) { + auto& stereoOpt = globals::features::vr.stereoOpt; + if (stereoOpt.loaded && stereoOpt.IsStencilActive()) { + // Only protect the main scene DSV — allow other DSVs to clear normally + auto renderer = globals::game::renderer; + auto& mainDepth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; + if (mainDepth.views[0]) { + // Compare the DSV being cleared against the main scene DSV + ID3D11Resource* clearRes = nullptr; + ID3D11Resource* mainRes = nullptr; + pDepthStencilView->GetResource(&clearRes); + mainDepth.views[0]->GetResource(&mainRes); + bool isMainDSV = (clearRes == mainRes); + if (clearRes) + clearRes->Release(); + if (mainRes) + mainRes->Release(); + if (isMainDSV) { + ClearFlags &= ~D3D11_CLEAR_STENCIL; + if (ClearFlags == 0) + return; + } + } + } + } + func(This, pDepthStencilView, ClearFlags, Depth, Stencil); + } + static inline REL::Relocation func; + }; + /** * @brief Installs hooks on the Map and Unmap methods of the provided D3D11 device context. * @@ -282,5 +348,11 @@ namespace globals { stl::detour_vfunc<14, ID3D11DeviceContext_Map>(a_context); stl::detour_vfunc<15, ID3D11DeviceContext_Unmap>(a_context); + + // VR stereo optimization hooks: intercept DSS and stencil clear + if (globals::game::isVR) { + stl::detour_vfunc<36, ID3D11DeviceContext_OMSetDepthStencilState>(a_context); + stl::detour_vfunc<53, ID3D11DeviceContext_ClearDepthStencilView>(a_context); + } } } diff --git a/src/State.cpp b/src/State.cpp index 13bf1681e7..89ce7f819f 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -11,6 +11,7 @@ #include "Features/TerrainBlending.h" #include "Features/TerrainHelper.h" #include "Features/Upscaling.h" +#include "Features/VRStereoOptimizations.h" #include "Features/VolumetricShadows.h" #include "Features/WeatherEditor.h" #include "Menu.h"