diff --git a/features/Volumetric Shadows/Shaders/Features/VolumetricShadows.ini b/features/Volumetric Shadows/Shaders/Features/VolumetricShadows.ini index e9d66d302c..178f84a3fc 100644 --- a/features/Volumetric Shadows/Shaders/Features/VolumetricShadows.ini +++ b/features/Volumetric Shadows/Shaders/Features/VolumetricShadows.ini @@ -1,5 +1,5 @@ [Info] -Version = 2-0-1 +Version = 3-0-0 [Nexus] autoupload = false diff --git a/features/Volumetric Shadows/Shaders/VolumetricShadows/BlurShadowCS.hlsl b/features/Volumetric Shadows/Shaders/VolumetricShadows/BlurShadowCS.hlsl deleted file mode 100644 index 266cd01fcc..0000000000 --- a/features/Volumetric Shadows/Shaders/VolumetricShadows/BlurShadowCS.hlsl +++ /dev/null @@ -1,100 +0,0 @@ -// 11x11 separable Gaussian blur for VSM shadow map -// BLUR_HORIZONTAL - horizontal pass -// BLUR_VERTICAL - vertical pass - -Texture2D InputTexture : register(t0); -RWTexture2D OutputTexture : register(u0); - -// Gaussian weights for 11-tap kernel (sigma ~= 2.5) -static const float weights[6] = { - 0.198596, // center - 0.175713, // +/- 1 - 0.121703, // +/- 2 - 0.065984, // +/- 3 - 0.028002, // +/- 4 - 0.009302 // +/- 5 -}; - -#define KERNEL_RADIUS 5 -#define GROUP_SIZE 128 - -// Shared memory for efficient loading -// We need GROUP_SIZE + 2 * KERNEL_RADIUS elements -groupshared float2 g_cache[GROUP_SIZE + 2 * KERNEL_RADIUS]; - -#if defined(BLUR_HORIZONTAL) -[numthreads(GROUP_SIZE, 1, 1)] void main(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID, uint3 dispatchThreadID : SV_DispatchThreadID) { - uint width, height; - InputTexture.GetDimensions(width, height); - - int2 baseCoord = int2(groupID.x * GROUP_SIZE - KERNEL_RADIUS, groupID.y); - int localIdx = groupThreadID.x; - - // Load main data - int2 coord = baseCoord + int2(localIdx, 0); - coord.x = clamp(coord.x, 0, (int)width - 1); - g_cache[localIdx] = InputTexture[coord]; - - // Load extra data for kernel overlap - if (localIdx < 2 * KERNEL_RADIUS) { - coord = baseCoord + int2(GROUP_SIZE + localIdx, 0); - coord.x = clamp(coord.x, 0, (int)width - 1); - g_cache[GROUP_SIZE + localIdx] = InputTexture[coord]; - } - - GroupMemoryBarrierWithGroupSync(); - - // Only process valid pixels - if (dispatchThreadID.x >= width || dispatchThreadID.y >= height) - return; - - // Apply horizontal blur - float2 result = g_cache[localIdx + KERNEL_RADIUS] * weights[0]; - - [unroll] for (int i = 1; i <= KERNEL_RADIUS; i++) - { - result += g_cache[localIdx + KERNEL_RADIUS - i] * weights[i]; - result += g_cache[localIdx + KERNEL_RADIUS + i] * weights[i]; - } - - OutputTexture[dispatchThreadID.xy] = result; -} - -#elif defined(BLUR_VERTICAL) -[numthreads(1, GROUP_SIZE, 1)] void main(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID, uint3 dispatchThreadID : SV_DispatchThreadID) { - uint width, height; - InputTexture.GetDimensions(width, height); - - int2 baseCoord = int2(groupID.x, groupID.y * GROUP_SIZE - KERNEL_RADIUS); - int localIdx = groupThreadID.y; - - // Load main data - int2 coord = baseCoord + int2(0, localIdx); - coord.y = clamp(coord.y, 0, (int)height - 1); - g_cache[localIdx] = InputTexture[coord]; - - // Load extra data for kernel overlap - if (localIdx < 2 * KERNEL_RADIUS) { - coord = baseCoord + int2(0, GROUP_SIZE + localIdx); - coord.y = clamp(coord.y, 0, (int)height - 1); - g_cache[GROUP_SIZE + localIdx] = InputTexture[coord]; - } - - GroupMemoryBarrierWithGroupSync(); - - // Only process valid pixels - if (dispatchThreadID.x >= width || dispatchThreadID.y >= height) - return; - - // Apply vertical blur - float2 result = g_cache[localIdx + KERNEL_RADIUS] * weights[0]; - - [unroll] for (int i = 1; i <= KERNEL_RADIUS; i++) - { - result += g_cache[localIdx + KERNEL_RADIUS - i] * weights[i]; - result += g_cache[localIdx + KERNEL_RADIUS + i] * weights[i]; - } - - OutputTexture[dispatchThreadID.xy] = result; -} -#endif diff --git a/features/Volumetric Shadows/Shaders/VolumetricShadows/BuildShadowFroxelCS.hlsl b/features/Volumetric Shadows/Shaders/VolumetricShadows/BuildShadowFroxelCS.hlsl new file mode 100644 index 0000000000..2d4e870648 --- /dev/null +++ b/features/Volumetric Shadows/Shaders/VolumetricShadows/BuildShadowFroxelCS.hlsl @@ -0,0 +1,160 @@ +// Builds a view-space froxel grid of PCF-filtered directional shadow visibility. +// +// One thread per voxel: +// 1. Reconstruct the voxel's world-space (camera-relative) position via screen UV + +// exponential view-Z slicing. +// 2. Pick the correct cascade for that view depth, smooth-blend at the boundary. +// 3. Sample the directional shadow cascade with a 5-tap cross PCF kernel and write +// the visibility scalar to the grid. +// +// Consumers then sample the grid trilinearly via VolumetricShadows.hlsli without doing +// any cascade math or shadow projection of their own. + +#include "Common/FrameBuffer.hlsli" +#include "Common/SharedData.hlsli" +#include "Common/VR.hlsli" + +cbuffer VolumetricShadowsCB : register(b1) +{ + uint3 GridSize; + uint HasShadows; + float NearZ; + float FarZFallback; + float ShadowBias; + float pad0; +}; + +struct DirectionalShadowLightData +{ + column_major float4x4 ShadowProj[2]; + column_major float4x4 InvShadowProj[2]; + float2 EndSplitDistances; + float2 StartSplitDistances; +}; + +Texture2DArray DirectionalShadowMap : register(t0); +StructuredBuffer DirectionalShadowLights : register(t98); +RWTexture3D ShadowFroxel : register(u0); +SamplerComparisonState ShadowSampler : register(s0); +SamplerState LinearSampler : register(s1); + +// Convert a linear view-Z (positive forward) into the depth-buffer NDC z using Skyrim's +// CameraData. Inverse of SharedData::GetScreenDepth. +float ViewZToDeviceZ(float viewZ) +{ + return (SharedData::CameraData.x - SharedData::CameraData.w / viewZ) / SharedData::CameraData.z; +} + +// Exponential mapping concentrates resolution close to the camera where shadow detail matters. +float SliceToViewZ(float slice, float farZ) +{ + float t = saturate(slice); + return NearZ * pow(farZ / NearZ, t); +} + +// 5-tap cross PCF using the hardware comparison sampler. Returns visibility in [0, 1]. +float SampleDirectionalShadowPCF(float3 positionLS, uint cascadeIndex) +{ + uint shadowWidth; + uint shadowHeight; + uint shadowSlices; + DirectionalShadowMap.GetDimensions(shadowWidth, shadowHeight, shadowSlices); + if (cascadeIndex >= shadowSlices) + return 1.0f; + + float2 texelSize = rcp(float2(max(shadowWidth, 1u), max(shadowHeight, 1u))); + float compareDepth = positionLS.z - ShadowBias; + + // Fall back to a single tap near the cascade border to avoid bleeding from the neighbouring slice. + float2 uvMin = texelSize * 1.5f; + float2 uvMax = 1.0f.xx - uvMin; + if (any(positionLS.xy < uvMin) || any(positionLS.xy > uvMax)) + return DirectionalShadowMap.SampleCmpLevelZero(ShadowSampler, float3(saturate(positionLS.xy), cascadeIndex), compareDepth); + + float center = DirectionalShadowMap.SampleCmpLevelZero(ShadowSampler, float3(positionLS.xy, cascadeIndex), compareDepth); + float cross = DirectionalShadowMap.SampleCmpLevelZero(ShadowSampler, float3(positionLS.xy + float2(texelSize.x, 0.0f), cascadeIndex), compareDepth); + cross += DirectionalShadowMap.SampleCmpLevelZero(ShadowSampler, float3(positionLS.xy - float2(texelSize.x, 0.0f), cascadeIndex), compareDepth); + cross += DirectionalShadowMap.SampleCmpLevelZero(ShadowSampler, float3(positionLS.xy + float2(0.0f, texelSize.y), cascadeIndex), compareDepth); + cross += DirectionalShadowMap.SampleCmpLevelZero(ShadowSampler, float3(positionLS.xy - float2(0.0f, texelSize.y), cascadeIndex), compareDepth); + + return (center * 4.0f + cross) * rcp(8.0f); +} + +float SampleDirectionalShadow(float3 positionWS, float viewZ, uint eyeIndex) +{ + DirectionalShadowLightData light = DirectionalShadowLights[0]; + + if (viewZ >= light.EndSplitDistances.y) + return 1.0f; + + float splitDenom = max(light.EndSplitDistances.x - light.StartSplitDistances.y, 1e-4f); + float cascadeSelect = saturate((viewZ - light.StartSplitDistances.y) / splitDenom); + uint primaryCascade = (uint)cascadeSelect; + + float3 absolutePositionWS = positionWS + FrameBuffer::CameraPosAdjust[eyeIndex].xyz; + float3 positionLS = mul(light.ShadowProj[primaryCascade], float4(absolutePositionWS, 1.0f)).xyz; + if (any(positionLS.xy < 0.0f) || any(positionLS.xy > 1.0f)) + return 1.0f; + + float shadow = SampleDirectionalShadowPCF(positionLS, primaryCascade); + + [branch] if (cascadeSelect > 0.0f && cascadeSelect < 1.0f) + { + uint secondaryCascade = 1u - primaryCascade; + float3 secondaryLS = mul(light.ShadowProj[secondaryCascade], float4(absolutePositionWS, 1.0f)).xyz; + if (!any(secondaryLS.xy < 0.0f) && !any(secondaryLS.xy > 1.0f)) { + float secondaryShadow = SampleDirectionalShadowPCF(secondaryLS, secondaryCascade); + shadow = lerp(shadow, secondaryShadow, cascadeSelect); + } + } + + // Fade out smoothly near the far edge of the second cascade. + float fade = saturate(viewZ / max(light.EndSplitDistances.y, 1.0f)); + float fadeFactor = 1.0f - pow(fade * fade, 8.0f); + return lerp(1.0f, shadow, fadeFactor); +} + +// Reconstruct camera-relative world position for a voxel center. +float3 ComputeVoxelWorldPosition(uint3 coord, out uint eyeIndex, out float viewZ) +{ + float3 volumeUVW = (float3(coord) + 0.5f) / float3(GridSize); + +#if defined(VR) + eyeIndex = Stereo::GetEyeIndexFromTexCoord(volumeUVW.xy); + float2 eyeUV = Stereo::ConvertFromStereoUV(volumeUVW.xy, eyeIndex); +#else + eyeIndex = 0; + float2 eyeUV = volumeUVW.xy; +#endif + + DirectionalShadowLightData light = DirectionalShadowLights[0]; + float farZ = max(light.EndSplitDistances.y, NearZ + 1.0f); + viewZ = SliceToViewZ(volumeUVW.z, farZ); + + float deviceZ = ViewZToDeviceZ(viewZ); + + float2 ndc = eyeUV * float2(2.0f, -2.0f) + float2(-1.0f, 1.0f); + // Compose unjittered ViewProj inverse from ViewInverse * ProjUnjitteredInverse so the + // grid is stable across frames (CameraViewProjInverse is the jittered variant). + float4 viewH = mul(FrameBuffer::CameraProjUnjitteredInverse[eyeIndex], float4(ndc, deviceZ, 1.0f)); + float3 viewPos = viewH.xyz / viewH.w; + float4 worldPosition = mul(FrameBuffer::CameraViewInverse[eyeIndex], float4(viewPos, 1.0f)); + return worldPosition.xyz; +} + +[numthreads(8, 8, 4)] void main(uint3 dispatchID : SV_DispatchThreadID) { + if (any(dispatchID >= GridSize)) + return; + + if (HasShadows == 0u) { + ShadowFroxel[dispatchID] = 1.0f; + return; + } + + uint eyeIndex; + float viewZ; + float3 positionWS = ComputeVoxelWorldPosition(dispatchID, eyeIndex, viewZ); + + float shadow = SampleDirectionalShadow(positionWS, viewZ, eyeIndex); + ShadowFroxel[dispatchID] = saturate(shadow); +} diff --git a/features/Volumetric Shadows/Shaders/VolumetricShadows/DownsampleShadowCS.hlsl b/features/Volumetric Shadows/Shaders/VolumetricShadows/DownsampleShadowCS.hlsl deleted file mode 100644 index b7f960281e..0000000000 --- a/features/Volumetric Shadows/Shaders/VolumetricShadows/DownsampleShadowCS.hlsl +++ /dev/null @@ -1,102 +0,0 @@ -Texture2DArray InputTexture : register(t0); -Texture2DArray ESRAMShadow : register(t1); -RWTexture2D OutputTexture : register(u0); -SamplerState LinearSampler : register(s0); - -float2 GetVSMMoments(in float depth) -{ - return float2(depth, depth * depth); -} - -float2 ReduceMoments(float2 a, float2 b, float2 c, float2 d) -{ - return (a + b + c + d) * 0.25; -} - -groupshared float2 g_scratchDepths[8][8]; - -#if defined(DOWNSAMPLE_SHADOW_MIP0) -static const uint CASCADE = 1; -#elif defined(DOWNSAMPLE_SHADOW_MIP1) -static const uint CASCADE = 0; -#endif - -[numthreads(8, 8, 1)] void main(uint3 dispatchThreadID : SV_DispatchThreadID, uint3 groupThreadID : SV_GroupThreadID) { - uint2 pixCoord = dispatchThreadID.xy * 2; - - uint inputW, inputH, inputSlices; - InputTexture.GetDimensions(inputW, inputH, inputSlices); - float2 uv = (pixCoord + 0.5) / float2(inputW, inputH); - - uint outputW, outputH; - OutputTexture.GetDimensions(outputW, outputH); - - // Determine reduction levels from input/output ratio - // Gather handles 2x, each group reduction handles another 2x - uint totalReduction = inputW / outputW; - uint groupReductions = 0; - if (totalReduction >= 4) - groupReductions = 1; - if (totalReduction >= 8) - groupReductions = 2; - if (totalReduction >= 16) - groupReductions = 3; - - // Gather from shadow cascades and mix with ESRAM shadow - float4 depths = InputTexture.GatherRed(LinearSampler, float3(uv, CASCADE)); - float4 esramDepths = ESRAMShadow.GatherRed(LinearSampler, float3(uv, CASCADE)); - depths = min(depths, esramDepths); - - float2 vsmDepth = 0; - for (uint i = 0; i < 4; i++) - vsmDepth += GetVSMMoments(depths[i]); - vsmDepth *= 0.25; - - g_scratchDepths[groupThreadID.x][groupThreadID.y] = vsmDepth; - - GroupMemoryBarrierWithGroupSync(); - - // First reduction: 2x2 - if (groupReductions >= 1) { - if (all((groupThreadID.xy % 2) == 0)) { - uint2 tid = groupThreadID.xy; - g_scratchDepths[tid.x][tid.y] = ReduceMoments( - g_scratchDepths[tid.x + 0][tid.y + 0], - g_scratchDepths[tid.x + 1][tid.y + 0], - g_scratchDepths[tid.x + 0][tid.y + 1], - g_scratchDepths[tid.x + 1][tid.y + 1]); - } - GroupMemoryBarrierWithGroupSync(); - } - - // Second reduction: 4x4 - if (groupReductions >= 2) { - if (all((groupThreadID.xy % 4) == 0)) { - uint2 tid = groupThreadID.xy; - g_scratchDepths[tid.x][tid.y] = ReduceMoments( - g_scratchDepths[tid.x + 0][tid.y + 0], - g_scratchDepths[tid.x + 2][tid.y + 0], - g_scratchDepths[tid.x + 0][tid.y + 2], - g_scratchDepths[tid.x + 2][tid.y + 2]); - } - GroupMemoryBarrierWithGroupSync(); - } - - // Third reduction: 8x8 - if (groupReductions >= 3) { - if (all(groupThreadID.xy == 0)) { - g_scratchDepths[0][0] = ReduceMoments( - g_scratchDepths[0][0], - g_scratchDepths[4][0], - g_scratchDepths[0][4], - g_scratchDepths[4][4]); - } - GroupMemoryBarrierWithGroupSync(); - } - - // Write output - only threads aligned to the output grid - uint outputDiv = max(totalReduction / 2, 1); - if (all((groupThreadID.xy % outputDiv) == 0)) { - OutputTexture[dispatchThreadID.xy / outputDiv] = g_scratchDepths[groupThreadID.x][groupThreadID.y]; - } -} diff --git a/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli b/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli index cdfb339ba2..919e10abfe 100644 --- a/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli +++ b/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli @@ -1,183 +1,81 @@ #ifndef __VOLUMETRIC_SHADOWS_HLSLI__ #define __VOLUMETRIC_SHADOWS_HLSLI__ -// Variance Shadow Maps (VSM) -// Chebyshev's inequality on filtered depth moments +// View-space froxel grid of PCF-filtered directional shadow visibility. +// Built each frame by BuildShadowFroxelCS.hlsl and bound at t18 by VolumetricShadows.cpp. namespace VolumetricShadows { - Texture2D SharedShadowMap : register(t18); + Texture3D SharedShadowMap : register(t18); - static const float VSM_MIN_VARIANCE = 0.00001; - static const float VSM_BLEEDING_REDUCTION = 0.2; + static const float kFroxelNearZ = 16.0; - // Chebyshev upper bound on P(X >= t) - // moments.x = mean(z), moments.y = mean(z^2) - float ComputeVSM(float2 moments, float depth) + // Match the build CS exponential slice mapping. The far distance comes from the + // cascade's max split, which is shared at t98 already. + float ViewZToSlice(float viewZ, float farZ) { - float variance = max(moments.y - moments.x * moments.x, VSM_MIN_VARIANCE); - float d = depth - moments.x; - float pMax = variance / (variance + d * d); - return (depth <= moments.x) ? 1.0 : pMax; + float clampedZ = clamp(viewZ, kFroxelNearZ, farZ); + return log(clampedZ / kFroxelNearZ) / log(max(farZ / kFroxelNearZ, 1.001)); } - // Reduces light bleeding by remapping shadow values below a threshold to zero - float ReduceBleeding(float shadow, float amount) + // Project a camera-relative world position into the per-eye froxel grid UVW and read + // trilinearly-filtered visibility. Returns 1.0 (fully lit) for positions outside the + // frustum or beyond the cascade range. + float SampleShadowFroxel(float3 positionWS, uint eyeIndex) { - return saturate((shadow - amount) / (1.0 - amount)); - } - - // Sample a single cascade for VSM shadow - float SampleVSMCascade3D( - uint cascadeIndex, - float noise, - uint sampleCount, - float rcpSampleCount, - float3 startPositionLS, - float3 endPositionLS, - out float firstSample) - { - float shadow = 0.0; - firstSample = 1.0; + float4 clip = mul(FrameBuffer::CameraViewProjUnjittered[eyeIndex], float4(positionWS, 1.0)); + if (clip.w <= 0.0) + return 1.0; - [loop] for (uint k = 0; k < sampleCount; k++) - { - float t = (float(k) + noise) * rcpSampleCount; - float3 samplePosLS = lerp(endPositionLS, startPositionLS, t); + float3 ndc = clip.xyz / clip.w; + float2 screenUV = ndc.xy * float2(0.5, -0.5) + 0.5; + if (any(screenUV < 0.0) || any(screenUV > 1.0) || ndc.z < 0.0 || ndc.z > 1.0) + return 1.0; - float2 moments = SharedShadowMap.SampleLevel(LinearSampler, samplePosLS.xy, 1u - cascadeIndex); - float lit = ComputeVSM(moments, samplePosLS.z); + float viewZ = SharedData::GetScreenDepth(ndc.z); + float farZ = max(DirectionalShadowLights[0].EndSplitDistances.y, kFroxelNearZ + 1.0); + if (viewZ >= farZ || viewZ <= 0.0) + return 1.0; - // Last to set firstSample is start position - firstSample = lit; + float slice = ViewZToSlice(viewZ, farZ); - shadow += lit; - } +#if defined(VR) + screenUV = Stereo::ConvertToStereoUV(screenUV, eyeIndex); +#endif - return shadow * rcpSampleCount; + return SharedShadowMap.SampleLevel(LinearSampler, float3(screenUV, slice), 0); } - float GetVSMShadow3D(float3 startPosition, float3 endPosition, float noise, uint baseSampleCount, uint eyeIndex, out float surfaceShadow) + // Sample shadow visibility along a view ray (start..end in camera-relative world space) + // using `baseSampleCount` jittered taps. Returns the average shadow and reports the start + // position's shadow value via `surfaceShadow` for back-compat with the prior VSM API. + float GetShadow3D(float3 startPosition, float3 endPosition, float noise, uint baseSampleCount, uint eyeIndex, out float surfaceShadow) { - DirectionalShadowLightData directionalShadowLightData = DirectionalShadowLights[0]; - - // View-space z — matches the linear cascade split distances from BSShadowDirectionalLight. - float3 midPosition = (startPosition + endPosition) * 0.5; - float shadowMapDepth = SharedData::GetScreenDepth(FrameBuffer::GetShadowDepth(midPosition, eyeIndex)); - - // Cascade projections are world-space; positions come in camera-relative. - startPosition += FrameBuffer::CameraPosAdjust[eyeIndex].xyz; - endPosition += FrameBuffer::CameraPosAdjust[eyeIndex].xyz; - - // Early out beyond cascade range - if (shadowMapDepth >= directionalShadowLightData.EndSplitDistances.y) { - surfaceShadow = 1.0; - return 1.0; - } - - // Reduce over distance - float fade = saturate(shadowMapDepth / directionalShadowLightData.EndSplitDistances.y); - - uint sampleCount = max(1, ceil(float(baseSampleCount) * (1.0 - fade))); + uint sampleCount = max(1u, baseSampleCount); float rcpSampleCount = rcp(sampleCount); - // Compute cascade blend factor with smoothstep - float cascadeSelect = saturate((shadowMapDepth - directionalShadowLightData.StartSplitDistances.y) / (directionalShadowLightData.EndSplitDistances.x - directionalShadowLightData.StartSplitDistances.y)); - - // Determine which cascade(s) to sample - uint primaryCascade = uint(cascadeSelect); - bool needsBlending = (cascadeSelect > 0.0) && (cascadeSelect < 1.0); - - // Transform ray to light space for primary cascade - float4x4 shadowProj = directionalShadowLightData.ShadowProj[primaryCascade]; - float3 startLS = mul(shadowProj, float4(startPosition, 1)).xyz; - float3 endLS = mul(shadowProj, float4(endPosition, 1)).xyz; - startLS.xy = saturate(startLS.xy); - endLS.xy = saturate(endLS.xy); - - // Sample primary cascade - float primaryFirstSample; - float shadow = SampleVSMCascade3D(primaryCascade, noise, sampleCount, rcpSampleCount, startLS, endLS, primaryFirstSample); - surfaceShadow = primaryFirstSample; + float shadow = 0.0; + surfaceShadow = 1.0; - // Blend with secondary cascade if needed - [branch] if (needsBlending) + [loop] for (uint k = 0; k < sampleCount; k++) { - uint secondaryCascade = 1 - primaryCascade; - - shadowProj = directionalShadowLightData.ShadowProj[secondaryCascade]; - startLS = mul(shadowProj, float4(startPosition, 1)).xyz; - endLS = mul(shadowProj, float4(endPosition, 1)).xyz; - startLS.xy = saturate(startLS.xy); - endLS.xy = saturate(endLS.xy); - - float secondaryFirstSample; - float shadowBlend = SampleVSMCascade3D(secondaryCascade, noise, sampleCount, rcpSampleCount, startLS, endLS, secondaryFirstSample); - shadow = lerp(shadow, shadowBlend, cascadeSelect); - surfaceShadow = lerp(surfaceShadow, secondaryFirstSample, cascadeSelect); + float t = (float(k) + noise) * rcpSampleCount; + float3 samplePos = lerp(endPosition, startPosition, t); + float lit = SampleShadowFroxel(samplePos, eyeIndex); + // Last iteration's `t` is closest to 1.0, hence closest to startPosition. + surfaceShadow = lit; + shadow += lit; } - // Apply distance fade - float fadeFactor = 1.0 - pow(fade * fade, 8); - surfaceShadow = lerp(1.0, surfaceShadow, fadeFactor); - return lerp(1.0, shadow, fadeFactor); - } - - // Sample a single cascade for VSM shadow (2D point sample) - float SampleVSMCascade2D(uint cascadeIndex, float3 positionLS) - { - float2 moments = SharedShadowMap.SampleLevel(LinearSampler, positionLS.xy, 1u - cascadeIndex); - return ComputeVSM(moments, positionLS.z); + return shadow * rcpSampleCount; } - float GetVSMShadow2D(float3 position, uint eyeIndex, out float detailedShadow) + // Single trilinear lookup at a surface position. + float GetShadow2D(float3 position, uint eyeIndex, out float detailedShadow) { - DirectionalShadowLightData directionalShadowLightData = DirectionalShadowLights[0]; - - float shadowMapDepth = SharedData::GetScreenDepth(FrameBuffer::GetShadowDepth(position, eyeIndex)); - - // Early out beyond cascade range - if (shadowMapDepth >= directionalShadowLightData.EndSplitDistances.y) { - detailedShadow = 1.0; - return 1.0; - } - - // Reduce over distance - float fade = saturate(shadowMapDepth / directionalShadowLightData.EndSplitDistances.y); - - // Cascade projections are world-space; position comes in camera-relative. - float3 positionWS = position + FrameBuffer::CameraPosAdjust[eyeIndex].xyz; - - // Compute cascade blend factor with smoothstep - float cascadeSelect = saturate((shadowMapDepth - directionalShadowLightData.StartSplitDistances.y) / (directionalShadowLightData.EndSplitDistances.x - directionalShadowLightData.StartSplitDistances.y)); - - // Determine which cascade(s) to sample - uint primaryCascade = uint(cascadeSelect); - bool needsBlending = (cascadeSelect > 0.0) && (cascadeSelect < 1.0); - - // Transform position to light space for primary cascade - float3 positionLS = mul(directionalShadowLightData.ShadowProj[primaryCascade], float4(positionWS, 1)).xyz; - positionLS.xy = saturate(positionLS.xy); - - // Sample primary cascade - float shadow = SampleVSMCascade2D(primaryCascade, positionLS); - - // Blend with secondary cascade if needed - [branch] if (needsBlending) - { - uint secondaryCascade = 1 - primaryCascade; - - positionLS = mul(directionalShadowLightData.ShadowProj[secondaryCascade], float4(positionWS, 1)).xyz; - positionLS.xy = saturate(positionLS.xy); - - float shadowBlend = SampleVSMCascade2D(secondaryCascade, positionLS); - shadow = lerp(shadow, shadowBlend, cascadeSelect); - } - - // Apply distance fade - float fadeFactor = 1.0 - pow(fade * fade, 8); - detailedShadow = lerp(1.0, ReduceBleeding(shadow, VSM_BLEEDING_REDUCTION), fadeFactor); - return lerp(1.0, shadow, fadeFactor); + float shadow = SampleShadowFroxel(position, eyeIndex); + detailedShadow = shadow; + return shadow; } } diff --git a/package/Shaders/Common/ShadowSampling.hlsli b/package/Shaders/Common/ShadowSampling.hlsli index 70e7143f54..cd841c7219 100644 --- a/package/Shaders/Common/ShadowSampling.hlsli +++ b/package/Shaders/Common/ShadowSampling.hlsli @@ -107,9 +107,9 @@ namespace ShadowSampling #if defined(VOLUMETRIC_SHADOWS) if (HasDirectionalShadows()) { - float vsmSurfaceShadow; - float shadow = VolumetricShadows::GetVSMShadow3D(startPosition, endPosition, noise, sampleCount, eyeIndex, vsmSurfaceShadow); - surfaceShadow *= vsmSurfaceShadow; + float froxelSurfaceShadow; + float shadow = VolumetricShadows::GetShadow3D(startPosition, endPosition, noise, sampleCount, eyeIndex, froxelSurfaceShadow); + surfaceShadow *= froxelSurfaceShadow; return worldShadow * shadow; } #else @@ -127,7 +127,7 @@ namespace ShadowSampling } #if defined(VOLUMETRIC_SHADOWS) - float shadow = VolumetricShadows::GetVSMShadow2D(worldPosition, eyeIndex, detailedShadow); + float shadow = VolumetricShadows::GetShadow2D(worldPosition, eyeIndex, detailedShadow); return shadow; #else detailedShadow = 1.0; diff --git a/src/Features/VolumetricShadows.cpp b/src/Features/VolumetricShadows.cpp index 5bc31c8a04..252517d25d 100644 --- a/src/Features/VolumetricShadows.cpp +++ b/src/Features/VolumetricShadows.cpp @@ -1,5 +1,6 @@ #include "VolumetricShadows.h" +#include "Deferred.h" #include "State.h" #include "Utils/D3D.h" @@ -7,7 +8,6 @@ void VolumetricShadows::SetupResources() { auto device = globals::d3d::device; - // Create samplers { D3D11_SAMPLER_DESC samplerDesc = {}; samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; @@ -21,280 +21,171 @@ void VolumetricShadows::SetupResources() Util::SetResourceName(linearSampler, "VolumetricShadows::LinearSampler"); } - // Compile compute shaders + { + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.ComparisonFunc = D3D11_COMPARISON_LESS_EQUAL; + samplerDesc.MinLOD = 0; + samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, &comparisonSampler)); + Util::SetResourceName(comparisonSampler, "VolumetricShadows::ComparisonSampler"); + } + + configCB = new ConstantBuffer(ConstantBufferDesc(), "VolumetricShadows::ConfigCB"); + + CompileShaders(); +} + +void VolumetricShadows::CompileShaders() +{ std::vector> defines; - defines.push_back({ "DOWNSAMPLE_SHADOW_MIP0", nullptr }); - downsampleShadowMip0CS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\DownsampleShadowCS.hlsl", defines, "cs_5_0")); - defines.clear(); - defines.push_back({ "DOWNSAMPLE_SHADOW_MIP1", nullptr }); - downsampleShadowMip1CS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\DownsampleShadowCS.hlsl", defines, "cs_5_0")); - - defines.clear(); - defines.push_back({ "BLUR_HORIZONTAL", nullptr }); - blurShadowHorizontalCS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\BlurShadowCS.hlsl", defines, "cs_5_0")); - defines.clear(); - defines.push_back({ "BLUR_VERTICAL", nullptr }); - blurShadowVerticalCS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\BlurShadowCS.hlsl", defines, "cs_5_0")); + if (REL::Module::IsVR()) + defines.push_back({ "VR", nullptr }); + buildShadowFroxelCS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\BuildShadowFroxelCS.hlsl", defines, "cs_5_0")); } void VolumetricShadows::ClearShaderCache() { - if (downsampleShadowMip0CS) { - downsampleShadowMip0CS->Release(); - downsampleShadowMip0CS = nullptr; - } - if (downsampleShadowMip1CS) { - downsampleShadowMip1CS->Release(); - downsampleShadowMip1CS = nullptr; - } - if (blurShadowHorizontalCS) { - blurShadowHorizontalCS->Release(); - blurShadowHorizontalCS = nullptr; - } - if (blurShadowVerticalCS) { - blurShadowVerticalCS->Release(); - blurShadowVerticalCS = nullptr; + if (buildShadowFroxelCS) { + buildShadowFroxelCS->Release(); + buildShadowFroxelCS = nullptr; } + CompileShaders(); +} - std::vector> defines; - defines.push_back({ "DOWNSAMPLE_SHADOW_MIP0", nullptr }); - downsampleShadowMip0CS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\DownsampleShadowCS.hlsl", defines, "cs_5_0")); - defines.clear(); - defines.push_back({ "DOWNSAMPLE_SHADOW_MIP1", nullptr }); - downsampleShadowMip1CS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\DownsampleShadowCS.hlsl", defines, "cs_5_0")); - - defines.clear(); - defines.push_back({ "BLUR_HORIZONTAL", nullptr }); - blurShadowHorizontalCS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\BlurShadowCS.hlsl", defines, "cs_5_0")); - defines.clear(); - defines.push_back({ "BLUR_VERTICAL", nullptr }); - blurShadowVerticalCS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\BlurShadowCS.hlsl", defines, "cs_5_0")); +void VolumetricShadows::CreateFroxelResources() +{ + auto device = globals::d3d::device; + + froxelWidth = kFroxelGridWidth * (REL::Module::IsVR() ? 2u : 1u); + froxelHeight = kFroxelGridHeight; + froxelDepth = kFroxelGridDepth; + + D3D11_TEXTURE3D_DESC texDesc{}; + texDesc.Width = froxelWidth; + texDesc.Height = froxelHeight; + texDesc.Depth = froxelDepth; + texDesc.MipLevels = 1; + texDesc.Format = DXGI_FORMAT_R8_UNORM; + texDesc.Usage = D3D11_USAGE_DEFAULT; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + DX::ThrowIfFailed(device->CreateTexture3D(&texDesc, nullptr, &shadowFroxelTexture)); + Util::SetResourceName(shadowFroxelTexture, "VolumetricShadows::ShadowFroxel"); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc{}; + srvDesc.Format = texDesc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + srvDesc.Texture3D.MipLevels = 1; + DX::ThrowIfFailed(device->CreateShaderResourceView(shadowFroxelTexture, &srvDesc, &shadowFroxelSRV)); + Util::SetResourceName(shadowFroxelSRV, "VolumetricShadows::ShadowFroxel SRV"); + + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc{}; + uavDesc.Format = texDesc.Format; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE3D; + uavDesc.Texture3D.WSize = froxelDepth; + DX::ThrowIfFailed(device->CreateUnorderedAccessView(shadowFroxelTexture, &uavDesc, &shadowFroxelUAV)); + Util::SetResourceName(shadowFroxelUAV, "VolumetricShadows::ShadowFroxel UAV"); } -void VolumetricShadows::CopyShadowLightData() +void VolumetricShadows::BuildShadowFroxel() { ZoneScoped; - TracyD3D11Zone(globals::state->tracyCtx, "VolumetricShadows::CopyShadowLightData"); + TracyD3D11Zone(globals::state->tracyCtx, "VolumetricShadows::BuildShadowFroxel"); auto context = globals::d3d::context; - { - if (!globals::state->HasDirectionalShadows()) { - SetSharedShadowMapSRV(context, nullptr); - return; - } - - context->PSGetShaderResources(4, 1, &shadowView); - - // Downsample shadow texture array to fixed 512x512 (mip1: 256x256) - if (shadowView) { - constexpr uint32_t SHADOW_COPY_SIZE = 512; - - // Lazily create fixed-size output textures - if (!shadowCopyTexture) { - shadowCopyWidth = SHADOW_COPY_SIZE; - shadowCopyHeight = SHADOW_COPY_SIZE; - - D3D11_TEXTURE2D_DESC copyDesc{}; - copyDesc.Width = SHADOW_COPY_SIZE; - copyDesc.Height = SHADOW_COPY_SIZE; - copyDesc.MipLevels = 2; - copyDesc.ArraySize = 1; - copyDesc.Format = DXGI_FORMAT_R16G16_UNORM; - copyDesc.SampleDesc.Count = 1; - copyDesc.SampleDesc.Quality = 0; - copyDesc.Usage = D3D11_USAGE_DEFAULT; - copyDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_RENDER_TARGET; - copyDesc.MiscFlags = 0; - - auto device = globals::d3d::device; - DX::ThrowIfFailed(device->CreateTexture2D(©Desc, nullptr, &shadowCopyTexture)); - Util::SetResourceName(shadowCopyTexture, "VolumetricShadows::ShadowCopy"); - - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc{}; - srvDesc.Format = copyDesc.Format; - srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MostDetailedMip = 0; - srvDesc.Texture2D.MipLevels = 2; - DX::ThrowIfFailed(device->CreateShaderResourceView(shadowCopyTexture, &srvDesc, &shadowCopySRV)); - Util::SetResourceName(shadowCopySRV, "VolumetricShadows::ShadowCopy SRV"); - - // Create mip-specific SRVs for blur passes - srvDesc.Texture2D.MostDetailedMip = 0; - srvDesc.Texture2D.MipLevels = 1; - DX::ThrowIfFailed(device->CreateShaderResourceView(shadowCopyTexture, &srvDesc, &shadowCopyMip0SRV)); - Util::SetResourceName(shadowCopyMip0SRV, "VolumetricShadows::ShadowCopy SRV mip0"); - - srvDesc.Texture2D.MostDetailedMip = 1; - srvDesc.Texture2D.MipLevels = 1; - DX::ThrowIfFailed(device->CreateShaderResourceView(shadowCopyTexture, &srvDesc, &shadowCopyMip1SRV)); - Util::SetResourceName(shadowCopyMip1SRV, "VolumetricShadows::ShadowCopy SRV mip1"); - - D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc{}; - uavDesc.Format = copyDesc.Format; - uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; - uavDesc.Texture2D.MipSlice = 0; - DX::ThrowIfFailed(device->CreateUnorderedAccessView(shadowCopyTexture, &uavDesc, &shadowCopyMip0UAV)); - Util::SetResourceName(shadowCopyMip0UAV, "VolumetricShadows::ShadowCopy UAV mip0"); - - uavDesc.Texture2D.MipSlice = 1; - DX::ThrowIfFailed(device->CreateUnorderedAccessView(shadowCopyTexture, &uavDesc, &shadowCopyMip1UAV)); - Util::SetResourceName(shadowCopyMip1UAV, "VolumetricShadows::ShadowCopy UAV mip1"); - - // Create temporary texture for blur intermediate result - DX::ThrowIfFailed(device->CreateTexture2D(©Desc, nullptr, &shadowBlurTempTexture)); - Util::SetResourceName(shadowBlurTempTexture, "VolumetricShadows::ShadowBlurTemp"); - - // Create mip-specific SRVs for blur temp texture - srvDesc.Texture2D.MostDetailedMip = 0; - srvDesc.Texture2D.MipLevels = 1; - DX::ThrowIfFailed(device->CreateShaderResourceView(shadowBlurTempTexture, &srvDesc, &shadowBlurTempMip0SRV)); - Util::SetResourceName(shadowBlurTempMip0SRV, "VolumetricShadows::ShadowBlurTemp SRV mip0"); - - srvDesc.Texture2D.MostDetailedMip = 1; - srvDesc.Texture2D.MipLevels = 1; - DX::ThrowIfFailed(device->CreateShaderResourceView(shadowBlurTempTexture, &srvDesc, &shadowBlurTempMip1SRV)); - Util::SetResourceName(shadowBlurTempMip1SRV, "VolumetricShadows::ShadowBlurTemp SRV mip1"); - - uavDesc.Texture2D.MipSlice = 0; - DX::ThrowIfFailed(device->CreateUnorderedAccessView(shadowBlurTempTexture, &uavDesc, &shadowBlurTempMip0UAV)); - Util::SetResourceName(shadowBlurTempMip0UAV, "VolumetricShadows::ShadowBlurTemp UAV mip0"); - - uavDesc.Texture2D.MipSlice = 1; - DX::ThrowIfFailed(device->CreateUnorderedAccessView(shadowBlurTempTexture, &uavDesc, &shadowBlurTempMip1UAV)); - Util::SetResourceName(shadowBlurTempMip1UAV, "VolumetricShadows::ShadowBlurTemp UAV mip1"); - } - - // Get input dimensions for dispatch sizing - ID3D11Resource* shadowResource = nullptr; - shadowView->GetResource(&shadowResource); - - if (shadowResource) { - ID3D11Texture2D* shadowTexture = nullptr; - shadowResource->QueryInterface(__uuidof(ID3D11Texture2D), reinterpret_cast(&shadowTexture)); - - if (shadowTexture) { - D3D11_TEXTURE2D_DESC srcDesc; - shadowTexture->GetDesc(&srcDesc); - - // Dispatch downsample compute shader - auto renderer = globals::game::renderer; - auto& esramDepthStencil = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kVOLUMETRIC_LIGHTING_SHADOWMAPS_ESRAM]; - - ID3D11ShaderResourceView* csSrvs[2]{ shadowView, esramDepthStencil.depthSRV }; - context->CSSetShaderResources(0, 2, csSrvs); - - context->CSSetSamplers(0, 1, &linearSampler); - - // Dispatch covers full input: each thread gathers 2x2, 8 threads per group - auto dispatchSize = srcDesc.Width / 16; - - // Mip 0 (cascade 1) - ID3D11UnorderedAccessView* csUavs[1]{ shadowCopyMip0UAV }; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - context->CSSetShader(downsampleShadowMip0CS, nullptr, 0); - context->Dispatch(dispatchSize, dispatchSize, 1); - - // Mip 1 (cascade 0) - csUavs[0] = shadowCopyMip1UAV; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - context->CSSetShader(downsampleShadowMip1CS, nullptr, 0); - context->Dispatch(dispatchSize, dispatchSize, 1); - - // Unbind SRVs before blur passes - csSrvs[0] = nullptr; - csSrvs[1] = nullptr; - context->CSSetShaderResources(0, 2, csSrvs); - csUavs[0] = nullptr; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - - constexpr uint32_t mip0Size = SHADOW_COPY_SIZE; - constexpr uint32_t mip1Size = SHADOW_COPY_SIZE / 2; - - // 11x11 separable blur for Mip 0 - { - const uint32_t GROUP_SIZE = 128; - - // Horizontal pass: shadowCopy mip0 -> shadowBlurTemp mip0 - ID3D11ShaderResourceView* blurSrvs[1]{ shadowCopyMip0SRV }; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = shadowBlurTempMip0UAV; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - context->CSSetShader(blurShadowHorizontalCS, nullptr, 0); - context->Dispatch((mip0Size + GROUP_SIZE - 1) / GROUP_SIZE, mip0Size, 1); - - // Unbind for next pass - blurSrvs[0] = nullptr; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = nullptr; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - - // Vertical pass: shadowBlurTemp mip0 -> shadowCopy mip0 - blurSrvs[0] = shadowBlurTempMip0SRV; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = shadowCopyMip0UAV; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - context->CSSetShader(blurShadowVerticalCS, nullptr, 0); - context->Dispatch(mip0Size, (mip0Size + GROUP_SIZE - 1) / GROUP_SIZE, 1); - - // Unbind - blurSrvs[0] = nullptr; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = nullptr; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - } - - // 11x11 separable blur for Mip 1 - { - const uint32_t GROUP_SIZE = 128; - - // Horizontal pass: shadowCopy mip1 -> shadowBlurTemp mip1 - ID3D11ShaderResourceView* blurSrvs[1]{ shadowCopyMip1SRV }; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = shadowBlurTempMip1UAV; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - context->CSSetShader(blurShadowHorizontalCS, nullptr, 0); - context->Dispatch((mip1Size + GROUP_SIZE - 1) / GROUP_SIZE, mip1Size, 1); - - // Unbind for next pass - blurSrvs[0] = nullptr; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = nullptr; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - - // Vertical pass: shadowBlurTemp mip1 -> shadowCopy mip1 - blurSrvs[0] = shadowBlurTempMip1SRV; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = shadowCopyMip1UAV; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - context->CSSetShader(blurShadowVerticalCS, nullptr, 0); - context->Dispatch(mip1Size, (mip1Size + GROUP_SIZE - 1) / GROUP_SIZE, 1); - - // Unbind - blurSrvs[0] = nullptr; - context->CSSetShaderResources(0, 1, blurSrvs); - csUavs[0] = nullptr; - context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); - } - - // Cleanup CS state - ID3D11SamplerState* nullSampler = nullptr; - context->CSSetSamplers(0, 1, &nullSampler); - context->CSSetShader(nullptr, nullptr, 0); - - shadowTexture->Release(); - } - shadowResource->Release(); - } - } - - auto* srv = shadowView ? (shadowCopySRV ? shadowCopySRV : shadowView) : nullptr; - SetSharedShadowMapSRV(context, srv); - - if (shadowView) - shadowView->Release(); + if (!globals::state->HasDirectionalShadows()) { + SetSharedShadowMapSRV(context, nullptr); + return; + } + + // Capture Skyrim's bound directional shadow cascade array (Texture2DArray). + context->PSGetShaderResources(4, 1, &shadowView); + if (!shadowView) { + SetSharedShadowMapSRV(context, nullptr); + return; + } + + if (!shadowFroxelTexture) + CreateFroxelResources(); + + if (!buildShadowFroxelCS) { + shadowView->Release(); shadowView = nullptr; + SetSharedShadowMapSRV(context, nullptr); + return; } + + // Mirror Deferred::CopyShadowLightData here so the t98 SBO contains current-frame cascade + // matrices before our build dispatch reads from it. Deferred will refresh it again during + // EarlyPrepasses; calling twice is cheap and avoids ordering coupling. + globals::deferred->CopyShadowLightData(); + + // Configure the build pass. + VolumetricShadowsCB cbData{}; + cbData.GridSize[0] = froxelWidth; + cbData.GridSize[1] = froxelHeight; + cbData.GridSize[2] = froxelDepth; + cbData.HasShadows = 1; + cbData.NearZ = 16.0f; + cbData.FarZ = 1.0f; // The shader reads the actual far split distance from the SBO; this is a fallback. + cbData.ShadowBias = 0.0001f; + configCB->Update(cbData); + + ID3D11Buffer* perFrameCB = *globals::game::perFrame.get(); + ID3D11Buffer* vrPerFrameCB = nullptr; + if (REL::Module::IsVR()) { + static REL::Relocation VRValues{ REL::Offset(0x3180688) }; + vrPerFrameCB = *VRValues.get(); + } + + ID3D11Buffer* sharedDataBuf = globals::state->sharedDataCB->CB(); + ID3D11Buffer* configBuf = configCB->CB(); + + context->CSSetConstantBuffers(1, 1, &configBuf); + context->CSSetConstantBuffers(5, 1, &sharedDataBuf); + context->CSSetConstantBuffers(12, 1, &perFrameCB); + if (vrPerFrameCB) + context->CSSetConstantBuffers(13, 1, &vrPerFrameCB); + + ID3D11ShaderResourceView* directionalShadowLightsSRV = globals::deferred->directionalShadowLights->srv.get(); + ID3D11ShaderResourceView* csSrvs[2]{ shadowView, directionalShadowLightsSRV }; + context->CSSetShaderResources(0, 1, &csSrvs[0]); + context->CSSetShaderResources(98, 1, &csSrvs[1]); + + ID3D11SamplerState* samplers[2]{ comparisonSampler, linearSampler }; + context->CSSetSamplers(0, 2, samplers); + + ID3D11UnorderedAccessView* uavs[1]{ shadowFroxelUAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + context->CSSetShader(buildShadowFroxelCS, nullptr, 0); + + const uint32_t groupX = (froxelWidth + 7u) / 8u; + const uint32_t groupY = (froxelHeight + 7u) / 8u; + const uint32_t groupZ = (froxelDepth + 3u) / 4u; + context->Dispatch(groupX, groupY, groupZ); + + // Cleanup CS bindings. + ID3D11Buffer* nullCBs[1] = { nullptr }; + ID3D11ShaderResourceView* nullSRVs[1] = { nullptr }; + ID3D11SamplerState* nullSamplers[2] = { nullptr, nullptr }; + ID3D11UnorderedAccessView* nullUAVs[1] = { nullptr }; + context->CSSetUnorderedAccessViews(0, 1, nullUAVs, nullptr); + context->CSSetShaderResources(0, 1, nullSRVs); + context->CSSetShaderResources(98, 1, nullSRVs); + context->CSSetSamplers(0, 2, nullSamplers); + context->CSSetConstantBuffers(1, 1, nullCBs); + context->CSSetShader(nullptr, nullptr, 0); + + SetSharedShadowMapSRV(context, shadowFroxelSRV); + + shadowView->Release(); + shadowView = nullptr; } void VolumetricShadows::SetSharedShadowMapSRV(ID3D11DeviceContext* a_context, ID3D11ShaderResourceView* a_srv) @@ -304,29 +195,17 @@ void VolumetricShadows::SetSharedShadowMapSRV(ID3D11DeviceContext* a_context, ID void VolumetricShadows::DrawSettings() { - ImGui::SeparatorText("Debug"); - - if (ImGui::TreeNode("Buffer Viewer")) { - static float debugRescale = .3f; - ImGui::SliderFloat("View Resize", &debugRescale, 0.f, 1.f); - - auto DisplayRT = [&](const char* label, ID3D11Texture2D* tex, ID3D11ShaderResourceView* srv) { - if (srv && tex) { - D3D11_TEXTURE2D_DESC desc; - tex->GetDesc(&desc); - char buf[128]; - snprintf(buf, sizeof(buf), "%s (%ux%u)", label, desc.Width, desc.Height); - if (ImGui::TreeNode(buf)) { - ImGui::Image(srv, { desc.Width * debugRescale, desc.Height * debugRescale }); - ImGui::TreePop(); - } - } - }; - - DisplayRT("VSM Cascade 0", shadowCopyTexture, shadowCopyMip0SRV); - DisplayRT("VSM Cascade 1", shadowCopyTexture, shadowCopyMip1SRV); - - ImGui::TreePop(); + ImGui::TextWrapped( + "Builds a %ux%ux%u view-space froxel grid (per eye) of PCF-filtered directional shadow visibility.\n" + "Consumers sample the grid via the shared shadow map texture at slot t%u.", + kFroxelGridWidth, + kFroxelGridHeight, + kFroxelGridDepth, + kSharedShadowMapShaderSlot); + + if (shadowFroxelTexture) { + ImGui::SeparatorText("Resource"); + ImGui::Text("Shadow Froxel: %ux%ux%u (R8_UNORM)", froxelWidth, froxelHeight, froxelDepth); } } diff --git a/src/Features/VolumetricShadows.h b/src/Features/VolumetricShadows.h index fb596dad6d..dce8d62a41 100644 --- a/src/Features/VolumetricShadows.h +++ b/src/Features/VolumetricShadows.h @@ -14,53 +14,65 @@ struct VolumetricShadows : Feature static constexpr uint32_t kSharedShadowMapShaderSlot = 18; + // Froxel grid dimensions. The width doubles in VR to cover side-by-side stereo. + static constexpr uint32_t kFroxelGridWidth = 160; + static constexpr uint32_t kFroxelGridHeight = 96; + static constexpr uint32_t kFroxelGridDepth = 64; + virtual std::pair> GetFeatureSummary() override { return { - "Volumetric Shadows provides downsampled VSM shadow maps for use by effects like particles and decals.\n" - "This improves shadow quality on transparent objects with minimal performance impact.", - { "Downsampled VSM shadows", - "Gaussian blur filtering", - "Multi-cascade support", + "Volumetric Shadows pre-filters the directional shadow cascades into a view-space froxel grid.\n" + "Consumers (particles, decals, effects, transparent geometry) sample the grid directly,\n" + "which is cheaper than re-projecting into shadow space per pixel and gives smoother results than VSM.", + { "View-space froxel grid", + "PCF pre-filtering", + "Multi-cascade blending", "Optimized for effects rendering" } }; } bool HasShaderDefine(RE::BSShader::Type shaderType) override; - // Compute shaders - ID3D11ComputeShader* downsampleShadowMip0CS = nullptr; - ID3D11ComputeShader* downsampleShadowMip1CS = nullptr; - ID3D11ComputeShader* blurShadowHorizontalCS = nullptr; - ID3D11ComputeShader* blurShadowVerticalCS = nullptr; - - ID3D11ShaderResourceView* shadowView = nullptr; + // Pre-filtered shadow froxel grid (R8 visibility in view space). + ID3D11Texture3D* shadowFroxelTexture = nullptr; + ID3D11ShaderResourceView* shadowFroxelSRV = nullptr; + ID3D11UnorderedAccessView* shadowFroxelUAV = nullptr; + uint32_t froxelWidth = 0; + uint32_t froxelHeight = 0; + uint32_t froxelDepth = 0; - // Downsampled shadow texture with 2 mip levels - ID3D11Texture2D* shadowCopyTexture = nullptr; - ID3D11ShaderResourceView* shadowCopySRV = nullptr; - ID3D11ShaderResourceView* shadowCopyMip0SRV = nullptr; - ID3D11ShaderResourceView* shadowCopyMip1SRV = nullptr; - ID3D11UnorderedAccessView* shadowCopyMip0UAV = nullptr; - ID3D11UnorderedAccessView* shadowCopyMip1UAV = nullptr; - uint32_t shadowCopyWidth = 0; - uint32_t shadowCopyHeight = 0; - - // Temporary texture for blur intermediate result - ID3D11Texture2D* shadowBlurTempTexture = nullptr; - ID3D11ShaderResourceView* shadowBlurTempMip0SRV = nullptr; - ID3D11ShaderResourceView* shadowBlurTempMip1SRV = nullptr; - ID3D11UnorderedAccessView* shadowBlurTempMip0UAV = nullptr; - ID3D11UnorderedAccessView* shadowBlurTempMip1UAV = nullptr; + ID3D11ComputeShader* buildShadowFroxelCS = nullptr; // Samplers ID3D11SamplerState* linearSampler = nullptr; + ID3D11SamplerState* comparisonSampler = nullptr; + + // Build-time configuration uploaded each dispatch. + struct alignas(16) VolumetricShadowsCB + { + uint32_t GridSize[3]; + uint32_t HasShadows; + + float NearZ; + float FarZ; + float ShadowBias; + float pad0; + }; + static_assert(sizeof(VolumetricShadowsCB) % 16 == 0, "VolumetricShadowsCB must be 16-byte aligned"); + + ConstantBuffer* configCB = nullptr; + + // Transient pointer captured from the shadow rendering pass. + ID3D11ShaderResourceView* shadowView = nullptr; virtual void DrawSettings() override; virtual void SetupResources() override; virtual void ClearShaderCache() override; - void CopyShadowLightData(); + // Captured during Skyrim's shadow rendering pass (slot t4 holds the cascade SRV here). + // Triggered from State::Draw on the RenderShadowmask utility shader. + void BuildShadowFroxel(); virtual void LoadSettings(json& o_json) override; virtual void SaveSettings(json& o_json) override; @@ -71,5 +83,8 @@ struct VolumetricShadows : Feature virtual void PostPostLoad() override; private: + void CreateFroxelResources(); + void CompileShaders(); + static void SetSharedShadowMapSRV(ID3D11DeviceContext* a_context, ID3D11ShaderResourceView* a_srv); }; diff --git a/src/State.cpp b/src/State.cpp index 753f2ce18c..b304667d89 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -95,7 +95,7 @@ void State::Draw() if (currentShader->shaderType.get() == RE::BSShader::Type::Utility) { if (currentPixelDescriptor & static_cast(SIE::ShaderCache::UtilityShaderFlags::RenderShadowmask)) { if (volumetricShadows.loaded) - volumetricShadows.CopyShadowLightData(); + volumetricShadows.BuildShadowFroxel(); } } }