diff --git a/features/Volumetric Shadows/Shaders/VolumetricShadows/BlurShadowCS.hlsl b/features/Volumetric Shadows/Shaders/VolumetricShadows/BlurShadowCS.hlsl index 266cd01fcc..4bd950aa50 100644 --- a/features/Volumetric Shadows/Shaders/VolumetricShadows/BlurShadowCS.hlsl +++ b/features/Volumetric Shadows/Shaders/VolumetricShadows/BlurShadowCS.hlsl @@ -1,33 +1,29 @@ -// 11x11 separable Gaussian blur for VSM shadow map +// Separable Gaussian blur for EVSM shadow map moments // BLUR_HORIZONTAL - horizontal pass // BLUR_VERTICAL - vertical pass -Texture2D InputTexture : register(t0); -RWTexture2D OutputTexture : register(u0); - -// Gaussian weights for 11-tap kernel (sigma ~= 2.5) -static const float weights[6] = { - 0.198596, // center - 0.175713, // +/- 1 - 0.121703, // +/- 2 - 0.065984, // +/- 3 - 0.028002, // +/- 4 - 0.009302 // +/- 5 +Texture2D InputTexture : register(t0); +RWTexture2D OutputTexture : register(u0); + +cbuffer BlurCB : register(b0) +{ + uint BlurRadius; + uint _pad[3]; }; -#define KERNEL_RADIUS 5 +#define MAX_KERNEL_RADIUS 32 #define GROUP_SIZE 128 // Shared memory for efficient loading -// We need GROUP_SIZE + 2 * KERNEL_RADIUS elements -groupshared float2 g_cache[GROUP_SIZE + 2 * KERNEL_RADIUS]; +// We need GROUP_SIZE + 2 * MAX_KERNEL_RADIUS elements +groupshared float4 g_cache[GROUP_SIZE + 2 * MAX_KERNEL_RADIUS]; #if defined(BLUR_HORIZONTAL) [numthreads(GROUP_SIZE, 1, 1)] void main(uint3 groupID : SV_GroupID, uint3 groupThreadID : SV_GroupThreadID, uint3 dispatchThreadID : SV_DispatchThreadID) { uint width, height; InputTexture.GetDimensions(width, height); - int2 baseCoord = int2(groupID.x * GROUP_SIZE - KERNEL_RADIUS, groupID.y); + int2 baseCoord = int2(groupID.x * GROUP_SIZE - MAX_KERNEL_RADIUS, groupID.y); int localIdx = groupThreadID.x; // Load main data @@ -36,7 +32,7 @@ groupshared float2 g_cache[GROUP_SIZE + 2 * KERNEL_RADIUS]; g_cache[localIdx] = InputTexture[coord]; // Load extra data for kernel overlap - if (localIdx < 2 * KERNEL_RADIUS) { + if (localIdx < 2 * MAX_KERNEL_RADIUS) { coord = baseCoord + int2(GROUP_SIZE + localIdx, 0); coord.x = clamp(coord.x, 0, (int)width - 1); g_cache[GROUP_SIZE + localIdx] = InputTexture[coord]; @@ -48,16 +44,21 @@ groupshared float2 g_cache[GROUP_SIZE + 2 * KERNEL_RADIUS]; if (dispatchThreadID.x >= width || dispatchThreadID.y >= height) return; - // Apply horizontal blur - float2 result = g_cache[localIdx + KERNEL_RADIUS] * weights[0]; + // Apply horizontal blur with dynamic radius + uint radius = min(BlurRadius, (uint)MAX_KERNEL_RADIUS); + float sigma = max(float(radius) * 0.5, 0.5); + float rcpTwoSigma2 = rcp(2.0 * sigma * sigma); + + float4 result = g_cache[localIdx + MAX_KERNEL_RADIUS]; + float totalWeight = 1.0; - [unroll] for (int i = 1; i <= KERNEL_RADIUS; i++) - { - result += g_cache[localIdx + KERNEL_RADIUS - i] * weights[i]; - result += g_cache[localIdx + KERNEL_RADIUS + i] * weights[i]; + for (uint i = 1; i <= radius; i++) { + float w = exp(-float(i * i) * rcpTwoSigma2); + result += (g_cache[localIdx + MAX_KERNEL_RADIUS - i] + g_cache[localIdx + MAX_KERNEL_RADIUS + i]) * w; + totalWeight += 2.0 * w; } - OutputTexture[dispatchThreadID.xy] = result; + OutputTexture[dispatchThreadID.xy] = result * rcp(totalWeight); } #elif defined(BLUR_VERTICAL) @@ -65,7 +66,7 @@ groupshared float2 g_cache[GROUP_SIZE + 2 * KERNEL_RADIUS]; uint width, height; InputTexture.GetDimensions(width, height); - int2 baseCoord = int2(groupID.x, groupID.y * GROUP_SIZE - KERNEL_RADIUS); + int2 baseCoord = int2(groupID.x, groupID.y * GROUP_SIZE - MAX_KERNEL_RADIUS); int localIdx = groupThreadID.y; // Load main data @@ -74,7 +75,7 @@ groupshared float2 g_cache[GROUP_SIZE + 2 * KERNEL_RADIUS]; g_cache[localIdx] = InputTexture[coord]; // Load extra data for kernel overlap - if (localIdx < 2 * KERNEL_RADIUS) { + if (localIdx < 2 * MAX_KERNEL_RADIUS) { coord = baseCoord + int2(0, GROUP_SIZE + localIdx); coord.y = clamp(coord.y, 0, (int)height - 1); g_cache[GROUP_SIZE + localIdx] = InputTexture[coord]; @@ -86,15 +87,20 @@ groupshared float2 g_cache[GROUP_SIZE + 2 * KERNEL_RADIUS]; if (dispatchThreadID.x >= width || dispatchThreadID.y >= height) return; - // Apply vertical blur - float2 result = g_cache[localIdx + KERNEL_RADIUS] * weights[0]; + // Apply vertical blur with dynamic radius + uint radius = min(BlurRadius, (uint)MAX_KERNEL_RADIUS); + float sigma = max(float(radius) * 0.5, 0.5); + float rcpTwoSigma2 = rcp(2.0 * sigma * sigma); + + float4 result = g_cache[localIdx + MAX_KERNEL_RADIUS]; + float totalWeight = 1.0; - [unroll] for (int i = 1; i <= KERNEL_RADIUS; i++) - { - result += g_cache[localIdx + KERNEL_RADIUS - i] * weights[i]; - result += g_cache[localIdx + KERNEL_RADIUS + i] * weights[i]; + for (uint i = 1; i <= radius; i++) { + float w = exp(-float(i * i) * rcpTwoSigma2); + result += (g_cache[localIdx + MAX_KERNEL_RADIUS - i] + g_cache[localIdx + MAX_KERNEL_RADIUS + i]) * w; + totalWeight += 2.0 * w; } - OutputTexture[dispatchThreadID.xy] = result; + OutputTexture[dispatchThreadID.xy] = result * rcp(totalWeight); } #endif diff --git a/features/Volumetric Shadows/Shaders/VolumetricShadows/DownsampleShadowCS.hlsl b/features/Volumetric Shadows/Shaders/VolumetricShadows/DownsampleShadowCS.hlsl index b7f960281e..f9a324a844 100644 --- a/features/Volumetric Shadows/Shaders/VolumetricShadows/DownsampleShadowCS.hlsl +++ b/features/Volumetric Shadows/Shaders/VolumetricShadows/DownsampleShadowCS.hlsl @@ -1,19 +1,43 @@ Texture2DArray InputTexture : register(t0); Texture2DArray ESRAMShadow : register(t1); -RWTexture2D OutputTexture : register(u0); +RWTexture2D OutputTexture : register(u0); SamplerState LinearSampler : register(s0); -float2 GetVSMMoments(in float depth) +cbuffer EVSMLinearizeCB : register(b0) { - return float2(depth, depth * depth); + float CascadeNear; + float CascadeFar; + float GlobalNear; + float GlobalFar; + float ExponentPositive; + float ExponentNegative; +}; + +// Convert orthographic shadow map depth [0,1] to globally-normalized linear depth [0,1]. +// Shadow map depth is linear within each cascade: worldZ = near + depth * (far - near). +// We then remap to a global range shared by both cascades so exponents behave consistently. +float NormalizeDepth(float depth) +{ + float worldZ = CascadeNear + depth * (CascadeFar - CascadeNear); + return (worldZ - GlobalNear) / (GlobalFar - GlobalNear); +} + +// Warp depth into EVSM moments: (e^(c*d), e^(2c*d), e^(-c*d), e^(-2c*d)) +// Positive exponent detects front-face occlusion, negative detects back-face (light bleeding). +float4 WarpDepth(float depth) +{ + float d = NormalizeDepth(depth); + float posWarp = exp(ExponentPositive * d); + float negWarp = exp(-ExponentNegative * d); + return float4(posWarp, posWarp * posWarp, negWarp, negWarp * negWarp); } -float2 ReduceMoments(float2 a, float2 b, float2 c, float2 d) +float4 ReduceMoments(float4 a, float4 b, float4 c, float4 d) { return (a + b + c + d) * 0.25; } -groupshared float2 g_scratchDepths[8][8]; +groupshared float4 g_scratchDepths[8][8]; #if defined(DOWNSAMPLE_SHADOW_MIP0) static const uint CASCADE = 1; @@ -47,12 +71,12 @@ static const uint CASCADE = 0; float4 esramDepths = ESRAMShadow.GatherRed(LinearSampler, float3(uv, CASCADE)); depths = min(depths, esramDepths); - float2 vsmDepth = 0; + float4 evsmMoments = 0; for (uint i = 0; i < 4; i++) - vsmDepth += GetVSMMoments(depths[i]); - vsmDepth *= 0.25; + evsmMoments += WarpDepth(depths[i]); + evsmMoments *= 0.25; - g_scratchDepths[groupThreadID.x][groupThreadID.y] = vsmDepth; + g_scratchDepths[groupThreadID.x][groupThreadID.y] = evsmMoments; GroupMemoryBarrierWithGroupSync(); diff --git a/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli b/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli index bf703beedf..f036bed1b8 100644 --- a/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli +++ b/features/Volumetric Shadows/Shaders/VolumetricShadows/VolumetricShadows.hlsli @@ -1,40 +1,70 @@ #ifndef __VOLUMETRIC_SHADOWS_HLSLI__ #define __VOLUMETRIC_SHADOWS_HLSLI__ -// Variance Shadow Maps (VSM) -// Chebyshev's inequality on filtered depth moments - namespace VolumetricShadows { - Texture2D SharedShadowMap : register(t18); - - static const float VSM_MIN_VARIANCE = 0.00001; - static const float VSM_BLEEDING_REDUCTION = 0.2; + Texture2D SharedShadowMap : register(t18); + + // EVSM exponents — must match values set in C++ (VolumetricShadows.h) + static const float EVSM_EXPONENT_POS = 40.0; + static const float EVSM_EXPONENT_NEG = 5.0; + static const float EVSM_VARIANCE_BIAS = 0.001; + static const float EVSM_LIGHT_BLEED_REDUCTION = 0.3; + + // Convert orthographic shadow projection depth to globally-normalized [0,1]. + // positionLS.z from mul(ShadowProj, worldPos) is orthographic [0,1] within the cascade. + // We remap through world space to the same global range used during moment generation. + float NormalizeDepth(float depth, float cascadeNear, float cascadeFar, float globalNear, float globalFar) + { + float worldZ = cascadeNear + depth * (cascadeFar - cascadeNear); + return (worldZ - globalNear) / (globalFar - globalNear); + } - // Chebyshev upper bound on P(X >= t) - // moments.x = mean(z), moments.y = mean(z^2) - float ComputeVSM(float2 moments, float depth) + // Chebyshev upper bound: P(x >= t) <= variance / (variance + (t - mean)^2) + // Returns visibility [0,1] where 1 = fully lit + float ChebyshevUpperBound(float mean, float meanSq, float testValue) { - float variance = max(moments.y - moments.x * moments.x, VSM_MIN_VARIANCE); - float d = depth - moments.x; + float variance = max(meanSq - mean * mean, EVSM_VARIANCE_BIAS); + + float d = testValue - mean; float pMax = variance / (variance + d * d); - return (depth <= moments.x) ? 1.0 : pMax; + + // Reduce light bleeding by remapping [bleedReduction..1] -> [0..1] + pMax = saturate((pMax - EVSM_LIGHT_BLEED_REDUCTION) / (1.0 - EVSM_LIGHT_BLEED_REDUCTION)); + + // If the test value is behind the mean, it's fully lit + return (testValue <= mean) ? 1.0 : pMax; } - // Reduces light bleeding by remapping shadow values below a threshold to zero - float ReduceBleeding(float shadow, float amount) + // Compute EVSM shadow from stored moments + // moments = (E[e^cz], E[e^2cz], E[e^-cz], E[e^-2cz]) + float ComputeEVSM(float4 moments, float depth, float cascadeNear, float cascadeFar, float globalNear, float globalFar) { - return saturate((shadow - amount) / (1.0 - amount)); + float d = NormalizeDepth(depth, cascadeNear, cascadeFar, globalNear, globalFar); + float posWarp = exp(EVSM_EXPONENT_POS * d); + float negWarp = exp(-EVSM_EXPONENT_NEG * d); + + // Positive exponent test (standard front-face shadow) + float posShadow = ChebyshevUpperBound(moments.x, moments.y, posWarp); + + // Negative exponent test (back-face light bleed suppression) + float negShadow = ChebyshevUpperBound(moments.z, moments.w, negWarp); + + return min(posShadow, negShadow); } - // Sample a single cascade for VSM shadow - float SampleVSMCascade3D( + // Sample a single cascade for EVSM shadow (3D ray march) + float SampleEVSMCascade3D( uint cascadeIndex, float noise, uint sampleCount, float rcpSampleCount, float3 startPositionLS, float3 endPositionLS, + float cascadeNear, + float cascadeFar, + float globalNear, + float globalFar, out float firstSample) { float shadow = 0.0; @@ -45,8 +75,8 @@ namespace VolumetricShadows float t = (float(k) + noise) * rcpSampleCount; float3 samplePosLS = lerp(endPositionLS, startPositionLS, t); - float2 moments = SharedShadowMap.SampleLevel(LinearSampler, samplePosLS.xy, 1u - cascadeIndex); - float lit = ComputeVSM(moments, samplePosLS.z); + float4 moments = SharedShadowMap.SampleLevel(LinearSampler, samplePosLS.xy, 1u - cascadeIndex); + float lit = ComputeEVSM(moments, samplePosLS.z, cascadeNear, cascadeFar, globalNear, globalFar); // Last to set firstSample is start position firstSample = lit; @@ -88,6 +118,11 @@ namespace VolumetricShadows uint primaryCascade = uint(cascadeSelect); bool needsBlending = (cascadeSelect > 0.0) && (cascadeSelect < 1.0); + float4 depthParams = directionalShadowLightData.CascadeDepthParams; + float4 globalParams = directionalShadowLightData.GlobalDepthParams; + float globalNear = globalParams.x; + float globalFar = globalParams.y; + // Transform ray to light space for primary cascade float4x4 shadowProj = directionalShadowLightData.ShadowProj[primaryCascade]; float3 startLS = mul(shadowProj, float4(startPosition, 1)).xyz; @@ -95,9 +130,12 @@ namespace VolumetricShadows startLS.xy = saturate(startLS.xy); endLS.xy = saturate(endLS.xy); + float primaryNear = primaryCascade == 0 ? depthParams.x : depthParams.z; + float primaryFar = primaryCascade == 0 ? depthParams.y : depthParams.w; + // Sample primary cascade float primaryFirstSample; - float shadow = SampleVSMCascade3D(primaryCascade, noise, sampleCount, rcpSampleCount, startLS, endLS, primaryFirstSample); + float shadow = SampleEVSMCascade3D(primaryCascade, noise, sampleCount, rcpSampleCount, startLS, endLS, primaryNear, primaryFar, globalNear, globalFar, primaryFirstSample); surfaceShadow = primaryFirstSample; // Blend with secondary cascade if needed @@ -111,8 +149,11 @@ namespace VolumetricShadows startLS.xy = saturate(startLS.xy); endLS.xy = saturate(endLS.xy); + float secondaryNear = secondaryCascade == 0 ? depthParams.x : depthParams.z; + float secondaryFar = secondaryCascade == 0 ? depthParams.y : depthParams.w; + float secondaryFirstSample; - float shadowBlend = SampleVSMCascade3D(secondaryCascade, noise, sampleCount, rcpSampleCount, startLS, endLS, secondaryFirstSample); + float shadowBlend = SampleEVSMCascade3D(secondaryCascade, noise, sampleCount, rcpSampleCount, startLS, endLS, secondaryNear, secondaryFar, globalNear, globalFar, secondaryFirstSample); shadow = lerp(shadow, shadowBlend, cascadeSelect); surfaceShadow = lerp(surfaceShadow, secondaryFirstSample, cascadeSelect); } @@ -123,11 +164,11 @@ namespace VolumetricShadows return lerp(1.0, shadow, fadeFactor); } - // Sample a single cascade for VSM shadow (2D point sample) - float SampleVSMCascade2D(uint cascadeIndex, float3 positionLS) + // Sample a single cascade for EVSM shadow (2D point sample) + float SampleEVSMCascade2D(uint cascadeIndex, float3 positionLS, float cascadeNear, float cascadeFar, float globalNear, float globalFar) { - float2 moments = SharedShadowMap.SampleLevel(LinearSampler, positionLS.xy, 1u - cascadeIndex); - return ComputeVSM(moments, positionLS.z); + float4 moments = SharedShadowMap.SampleLevel(LinearSampler, positionLS.xy, 1u - cascadeIndex); + return ComputeEVSM(moments, positionLS.z, cascadeNear, cascadeFar, globalNear, globalFar); } float GetVSMShadow2D(float3 position, out float detailedShadow) @@ -155,12 +196,20 @@ namespace VolumetricShadows uint primaryCascade = uint(cascadeSelect); bool needsBlending = (cascadeSelect > 0.0) && (cascadeSelect < 1.0); + float4 depthParams = directionalShadowLightData.CascadeDepthParams; + float4 globalParams = directionalShadowLightData.GlobalDepthParams; + float globalNear = globalParams.x; + float globalFar = globalParams.y; + // Transform position to light space for primary cascade float3 positionLS = mul(directionalShadowLightData.ShadowProj[primaryCascade], float4(positionWS, 1)).xyz; positionLS.xy = saturate(positionLS.xy); + float primaryNear = primaryCascade == 0 ? depthParams.x : depthParams.z; + float primaryFar = primaryCascade == 0 ? depthParams.y : depthParams.w; + // Sample primary cascade - float shadow = SampleVSMCascade2D(primaryCascade, positionLS); + float shadow = SampleEVSMCascade2D(primaryCascade, positionLS, primaryNear, primaryFar, globalNear, globalFar); // Blend with secondary cascade if needed [branch] if (needsBlending) @@ -170,14 +219,17 @@ namespace VolumetricShadows positionLS = mul(directionalShadowLightData.ShadowProj[secondaryCascade], float4(positionWS, 1)).xyz; positionLS.xy = saturate(positionLS.xy); - float shadowBlend = SampleVSMCascade2D(secondaryCascade, positionLS); + float secondaryNear = secondaryCascade == 0 ? depthParams.x : depthParams.z; + float secondaryFar = secondaryCascade == 0 ? depthParams.y : depthParams.w; + + float shadowBlend = SampleEVSMCascade2D(secondaryCascade, positionLS, secondaryNear, secondaryFar, globalNear, globalFar); shadow = lerp(shadow, shadowBlend, cascadeSelect); } // Apply distance fade float fadeFactor = 1.0 - pow(fade * fade, 8); - detailedShadow = lerp(1.0, ReduceBleeding(shadow, VSM_BLEEDING_REDUCTION), fadeFactor); - return lerp(1.0, shadow, fadeFactor); + detailedShadow = lerp(1.0, shadow, fadeFactor); + return detailedShadow; } } diff --git a/package/Shaders/Common/ShadowSampling.hlsli b/package/Shaders/Common/ShadowSampling.hlsli index 8397d5333b..5f638cbd3f 100644 --- a/package/Shaders/Common/ShadowSampling.hlsli +++ b/package/Shaders/Common/ShadowSampling.hlsli @@ -30,6 +30,8 @@ struct DirectionalShadowLightData column_major float4x4 InvShadowProj[2]; float2 EndSplitDistances; float2 StartSplitDistances; + float4 CascadeDepthParams; + float4 GlobalDepthParams; }; StructuredBuffer DirectionalShadowLights : register(t98); diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 9b87009bb4..6eab7a4633 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -8,6 +8,7 @@ #include "Features/DynamicCubemaps.h" #include "Features/IBL.h" +#include "Features/VolumetricShadows.h" #include "Features/ScreenSpaceGI.h" #include "Features/Skylighting.h" #include "Features/SubsurfaceScattering.h" @@ -558,6 +559,9 @@ void Deferred::CopyShadowLightData() SetShadowCascadeParameters(sunShadowLight->GetRuntimeData(), dd); + dd.CascadeDepthParams = globals::features::volumetricShadows.GetCascadeDepthParams(); + dd.GlobalDepthParams = globals::features::volumetricShadows.GetGlobalDepthParams(); + D3D11_MAPPED_SUBRESOURCE mapped{}; DX::ThrowIfFailed(context->Map(directionalShadowLights->resource.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); memcpy(mapped.pData, &dd, sizeof(DirectionalShadowLightData)); diff --git a/src/Deferred.h b/src/Deferred.h index b18002d4bd..9eeb4779cd 100644 --- a/src/Deferred.h +++ b/src/Deferred.h @@ -27,6 +27,8 @@ class Deferred float4x4 InvShadowProj[2]; float2 EndSplitDistances; float2 StartSplitDistances; + float4 CascadeDepthParams; + float4 GlobalDepthParams; // x=globalNear, y=globalFar, zw=unused }; STATIC_ASSERT_ALIGNAS_16(DirectionalShadowLightData); diff --git a/src/Features/VolumetricShadows.cpp b/src/Features/VolumetricShadows.cpp index b59bf36c94..d719d91583 100644 --- a/src/Features/VolumetricShadows.cpp +++ b/src/Features/VolumetricShadows.cpp @@ -4,6 +4,14 @@ #include "State.h" #include "Utils/D3D.h" +#include "RE/B/BSShadowDirectionalLight.h" + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + VolumetricShadows::Settings, + BlurRadius, + ExponentPositive, + ExponentNegative) + void VolumetricShadows::SetupResources() { auto device = globals::d3d::device; @@ -22,6 +30,28 @@ void VolumetricShadows::SetupResources() Util::SetResourceName(linearSampler, "VolumetricShadows::LinearSampler"); } + // Create linearization cbuffer + { + D3D11_BUFFER_DESC cbDesc{}; + cbDesc.ByteWidth = sizeof(EVSMLinearizeCB); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(device->CreateBuffer(&cbDesc, nullptr, &linearizeCB)); + Util::SetResourceName(linearizeCB, "VolumetricShadows::LinearizeCB"); + } + + // Create blur cbuffer + { + D3D11_BUFFER_DESC cbDesc{}; + cbDesc.ByteWidth = sizeof(BlurCB); + cbDesc.Usage = D3D11_USAGE_DYNAMIC; + cbDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + DX::ThrowIfFailed(device->CreateBuffer(&cbDesc, nullptr, &blurCB)); + Util::SetResourceName(blurCB, "VolumetricShadows::BlurCB"); + } + // Compile compute shaders std::vector> defines; defines.push_back({ "DOWNSAMPLE_SHADOW_MIP0", nullptr }); @@ -72,6 +102,51 @@ void VolumetricShadows::ClearShaderCache() blurShadowVerticalCS = static_cast(Util::CompileShader(L"Data\\Shaders\\VolumetricShadows\\BlurShadowCS.hlsl", defines, "cs_5_0")); } +void VolumetricShadows::ExtractCascadeNearFar() +{ + auto* shadowSceneNode = globals::game::smState->shadowSceneNode[0]; + if (!shadowSceneNode) + return; + + auto* sunShadowLight = shadowSceneNode->GetRuntimeData().sunShadowDirLight; + if (!sunShadowLight) + return; + + auto extractCascade = [&](RE::NiCamera* camera, const REX::W32::XMFLOAT4X4& transform, uint32_t cascadeIdx) { + if (camera) { + auto& frustum = camera->GetRuntimeData2().viewFrustum; + cascadeNear[cascadeIdx] = frustum.fNear; + cascadeFar[cascadeIdx] = frustum.fFar; + } + // Extract world-to-UV scale from shadow projection matrix + // Column 0 of the effective HLSL matrix = row 0 cross rows of C++ row-major storage + // The UV-per-world-unit scale is the length of the first output component's gradient + float sx = transform.m[0][0]; + float sy = transform.m[1][0]; + float sz = transform.m[2][0]; + cascadeScale[cascadeIdx] = std::sqrt(sx * sx + sy * sy + sz * sz); + }; + + auto& lightData = sunShadowLight->GetRuntimeData(); + const auto count = std::min(lightData.shadowmapDescriptors.size(), 2u); + for (uint32_t i = 0; i < count; i++) + extractCascade(lightData.shadowmapDescriptors[i].camera.get(), lightData.shadowmapDescriptors[i].lightTransform, i); +} + +float4 VolumetricShadows::GetCascadeDepthParams() +{ + ExtractCascadeNearFar(); + return { cascadeNear[0], cascadeFar[0], cascadeNear[1], cascadeFar[1] }; +} + +float4 VolumetricShadows::GetGlobalDepthParams() +{ + ExtractCascadeNearFar(); + float globalNear = std::min(cascadeNear[0], cascadeNear[1]); + float globalFar = std::max(cascadeFar[0], cascadeFar[1]); + return { globalNear, globalFar, 0.f, 0.f }; +} + void VolumetricShadows::CopyShadowLightData() { ZoneScoped; @@ -87,11 +162,11 @@ void VolumetricShadows::CopyShadowLightData() context->PSGetShaderResources(4, 1, &shadowView); - // Downsample shadow texture array to fixed 512x512 (mip1: 256x256) + // Downsample shadow texture array to fixed size if (shadowView) { constexpr uint32_t SHADOW_COPY_SIZE = 512; - // Lazily create fixed-size output textures + // Lazily create fixed-size output textures (RGBA16F for EVSM exponential moments) if (!shadowCopyTexture) { shadowCopyWidth = SHADOW_COPY_SIZE; shadowCopyHeight = SHADOW_COPY_SIZE; @@ -101,7 +176,7 @@ void VolumetricShadows::CopyShadowLightData() copyDesc.Height = SHADOW_COPY_SIZE; copyDesc.MipLevels = 2; copyDesc.ArraySize = 1; - copyDesc.Format = DXGI_FORMAT_R16G16_UNORM; + copyDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; copyDesc.SampleDesc.Count = 1; copyDesc.SampleDesc.Quality = 0; copyDesc.Usage = D3D11_USAGE_DEFAULT; @@ -166,6 +241,17 @@ void VolumetricShadows::CopyShadowLightData() Util::SetResourceName(shadowBlurTempMip1UAV, "VolumetricShadows::ShadowBlurTemp UAV mip1"); } + // Extract cascade near/far and projection scale + ExtractCascadeNearFar(); + + // Compute per-cascade blur radii for consistent world-space softness + // Mip 0 (512x512) = cascade 1, Mip 1 (256x256) = cascade 0 + // pixelRadius = worldRadius * cascadeScale * textureSize + uint32_t blurRadiusMip0 = std::max(1u, std::min(32u, + static_cast(std::round(settings.BlurRadius * cascadeScale[1] * float(SHADOW_COPY_SIZE))))); + uint32_t blurRadiusMip1 = std::max(1u, std::min(32u, + static_cast(std::round(settings.BlurRadius * cascadeScale[0] * float(SHADOW_COPY_SIZE / 2))))); + // Get input dimensions for dispatch sizing ID3D11Resource* shadowResource = nullptr; shadowView->GetResource(&shadowResource); @@ -190,7 +276,25 @@ void VolumetricShadows::CopyShadowLightData() // Dispatch covers full input: each thread gathers 2x2, 8 threads per group auto dispatchSize = srcDesc.Width / 16; - // Mip 0 (cascade 1) + // Global near/far: consistent [0,1] mapping across both cascades + float globalNear = std::min(cascadeNear[0], cascadeNear[1]); + float globalFar = std::max(cascadeFar[0], cascadeFar[1]); + + // Mip 0 (cascade 1) - update cbuffer with cascade 1 near/far + exponents + { + D3D11_MAPPED_SUBRESOURCE mapped{}; + DX::ThrowIfFailed(context->Map(linearizeCB, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + auto* cb = static_cast(mapped.pData); + cb->CascadeNear = cascadeNear[1]; + cb->CascadeFar = cascadeFar[1]; + cb->GlobalNear = globalNear; + cb->GlobalFar = globalFar; + cb->ExponentPositive = settings.ExponentPositive; + cb->ExponentNegative = settings.ExponentNegative; + context->Unmap(linearizeCB, 0); + context->CSSetConstantBuffers(0, 1, &linearizeCB); + } + ID3D11UnorderedAccessView* csUavs[1]{ shadowCopyMip0UAV }; context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); context->CSSetShader(downsampleShadowMip0CS, nullptr, 0); @@ -198,7 +302,20 @@ void VolumetricShadows::CopyShadowLightData() context->Dispatch(dispatchSize, dispatchSize, 1); globals::profiler->EndPass(); - // Mip 1 (cascade 0) + // Mip 1 (cascade 0) - update cbuffer with cascade 0 near/far + exponents + { + D3D11_MAPPED_SUBRESOURCE mapped{}; + DX::ThrowIfFailed(context->Map(linearizeCB, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + auto* cb = static_cast(mapped.pData); + cb->CascadeNear = cascadeNear[0]; + cb->CascadeFar = cascadeFar[0]; + cb->GlobalNear = globalNear; + cb->GlobalFar = globalFar; + cb->ExponentPositive = settings.ExponentPositive; + cb->ExponentNegative = settings.ExponentNegative; + context->Unmap(linearizeCB, 0); + } + csUavs[0] = shadowCopyMip1UAV; context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); context->CSSetShader(downsampleShadowMip1CS, nullptr, 0); @@ -212,14 +329,26 @@ void VolumetricShadows::CopyShadowLightData() context->CSSetShaderResources(0, 2, csSrvs); csUavs[0] = nullptr; context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); + ID3D11Buffer* nullCB = nullptr; + context->CSSetConstantBuffers(0, 1, &nullCB); constexpr uint32_t mip0Size = SHADOW_COPY_SIZE; constexpr uint32_t mip1Size = SHADOW_COPY_SIZE / 2; - // 11x11 separable blur for Mip 0 + // Separable blur for Mip 0 { const uint32_t GROUP_SIZE = 128; + // Update blur cbuffer for mip 0 + { + D3D11_MAPPED_SUBRESOURCE mapped{}; + DX::ThrowIfFailed(context->Map(blurCB, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + auto* cb = static_cast(mapped.pData); + cb->BlurRadius = blurRadiusMip0; + context->Unmap(blurCB, 0); + context->CSSetConstantBuffers(0, 1, &blurCB); + } + // Horizontal pass: shadowCopy mip0 -> shadowBlurTemp mip0 ID3D11ShaderResourceView* blurSrvs[1]{ shadowCopyMip0SRV }; context->CSSetShaderResources(0, 1, blurSrvs); @@ -253,10 +382,19 @@ void VolumetricShadows::CopyShadowLightData() context->CSSetUnorderedAccessViews(0, 1, csUavs, nullptr); } - // 11x11 separable blur for Mip 1 + // Separable blur for Mip 1 { const uint32_t GROUP_SIZE = 128; + // Update blur cbuffer for mip 1 + { + D3D11_MAPPED_SUBRESOURCE mapped{}; + DX::ThrowIfFailed(context->Map(blurCB, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + auto* cb = static_cast(mapped.pData); + cb->BlurRadius = blurRadiusMip1; + context->Unmap(blurCB, 0); + } + // Horizontal pass: shadowCopy mip1 -> shadowBlurTemp mip1 ID3D11ShaderResourceView* blurSrvs[1]{ shadowCopyMip1SRV }; context->CSSetShaderResources(0, 1, blurSrvs); @@ -293,6 +431,8 @@ void VolumetricShadows::CopyShadowLightData() // Cleanup CS state ID3D11SamplerState* nullSampler = nullptr; context->CSSetSamplers(0, 1, &nullSampler); + ID3D11Buffer* nullCB2 = nullptr; + context->CSSetConstantBuffers(0, 1, &nullCB2); context->CSSetShader(nullptr, nullptr, 0); shadowTexture->Release(); @@ -317,8 +457,32 @@ void VolumetricShadows::SetSharedShadowMapSRV(ID3D11DeviceContext* a_context, ID void VolumetricShadows::DrawSettings() { + ImGui::SliderFloat("Blur Radius", &settings.BlurRadius, 0.0f, 500.0f, "%.0f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Blur radius in world units. Both cascades are scaled to match this world-space softness."); + + ImGui::SliderFloat("Positive Exponent", &settings.ExponentPositive, 1.0f, 80.0f, "%.1f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Controls shadow sharpness. Higher = sharper shadows but more numerical instability."); + + ImGui::SliderFloat("Negative Exponent", &settings.ExponentNegative, 1.0f, 40.0f, "%.1f"); + if (ImGui::IsItemHovered()) + ImGui::SetTooltip("Controls light bleed suppression. Higher = less light bleed but may cause artifacts."); + ImGui::SeparatorText("Debug"); + if (ImGui::TreeNode("Info")) { + ImGui::Text("Cascade 0: scale=%.6f near=%.1f far=%.1f", cascadeScale[0], cascadeNear[0], cascadeFar[0]); + ImGui::Text("Cascade 1: scale=%.6f near=%.1f far=%.1f", cascadeScale[1], cascadeNear[1], cascadeFar[1]); + + uint32_t blurMip0 = std::max(1u, std::min(32u, + static_cast(std::round(settings.BlurRadius * cascadeScale[1] * float(shadowCopyWidth))))); + uint32_t blurMip1 = std::max(1u, std::min(32u, + static_cast(std::round(settings.BlurRadius * cascadeScale[0] * float(shadowCopyWidth / 2))))); + ImGui::Text("Blur pixels: mip0=%u mip1=%u", blurMip0, blurMip1); + ImGui::TreePop(); + } + if (ImGui::TreeNode("Buffer Viewer")) { static float debugRescale = .3f; ImGui::SliderFloat("View Resize", &debugRescale, 0.f, 1.f); @@ -336,26 +500,26 @@ void VolumetricShadows::DrawSettings() } }; - DisplayRT("VSM Cascade 0", shadowCopyTexture, shadowCopyMip0SRV); - DisplayRT("VSM Cascade 1", shadowCopyTexture, shadowCopyMip1SRV); + DisplayRT("EVSM Cascade 0", shadowCopyTexture, shadowCopyMip0SRV); + DisplayRT("EVSM Cascade 1", shadowCopyTexture, shadowCopyMip1SRV); ImGui::TreePop(); } } -void VolumetricShadows::LoadSettings(json&) +void VolumetricShadows::LoadSettings(json& o_json) { - // No settings currently + settings = o_json; } -void VolumetricShadows::SaveSettings(json&) +void VolumetricShadows::SaveSettings(json& o_json) { - // No settings currently + o_json = settings; } void VolumetricShadows::RestoreDefaultSettings() { - // No settings currently + settings = {}; } struct CreateDepthStencil_VolumetricLighting @@ -363,8 +527,8 @@ struct CreateDepthStencil_VolumetricLighting static void thunk(RE::BSGraphics::Renderer* This, uint32_t a_target, RE::BSGraphics::DepthStencilTargetProperties* a_properties) { RE::BSGraphics::DepthStencilTargetProperties properties = *a_properties; - a_properties->height = 1024; - a_properties->width = 1024; + properties.height = 1024; + properties.width = 1024; func(This, a_target, &properties); } static inline REL::Relocation func; diff --git a/src/Features/VolumetricShadows.h b/src/Features/VolumetricShadows.h index 0830395070..7dc0c4d4f4 100644 --- a/src/Features/VolumetricShadows.h +++ b/src/Features/VolumetricShadows.h @@ -17,15 +17,43 @@ struct VolumetricShadows : Feature virtual std::pair> GetFeatureSummary() override { - return { T("feature.volumetric_shadows.description", "Volumetric Shadows provides downsampled VSM shadow maps for use by effects like particles and decals.\nThis improves shadow quality on transparent objects with minimal performance impact."), - { T("feature.volumetric_shadows.key_feature_1", "Downsampled VSM shadows"), + return { T("feature.volumetric_shadows.description", "Volumetric Shadows provides downsampled EVSM shadow maps for use by effects like particles and decals.\nThis improves shadow quality on transparent objects with minimal performance impact."), + { T("feature.volumetric_shadows.key_feature_1", "Downsampled EVSM shadows"), T("feature.volumetric_shadows.key_feature_2", "Gaussian blur filtering"), T("feature.volumetric_shadows.key_feature_3", "Multi-cascade support"), T("feature.volumetric_shadows.key_feature_4", "Optimized for effects rendering") } }; - }; + } bool HasShaderDefine(RE::BSShader::Type shaderType) override; + struct Settings + { + float BlurRadius = 100.0f; + float ExponentPositive = 40.0f; + float ExponentNegative = 5.0f; + }; + Settings settings; + + struct alignas(16) EVSMLinearizeCB + { + float CascadeNear; + float CascadeFar; + float GlobalNear; + float GlobalFar; + float ExponentPositive; + float ExponentNegative; + float _pad[2]; + }; + + struct alignas(16) BlurCB + { + uint32_t BlurRadius; + uint32_t _pad[3]; + }; + + float4 GetCascadeDepthParams(); + float4 GetGlobalDepthParams(); + // Compute shaders ID3D11ComputeShader* downsampleShadowMip0CS = nullptr; ID3D11ComputeShader* downsampleShadowMip1CS = nullptr; @@ -51,6 +79,15 @@ struct VolumetricShadows : Feature ID3D11UnorderedAccessView* shadowBlurTempMip0UAV = nullptr; ID3D11UnorderedAccessView* shadowBlurTempMip1UAV = nullptr; + // Cbuffers + ID3D11Buffer* linearizeCB = nullptr; + ID3D11Buffer* blurCB = nullptr; + + // Cached cascade near/far values and projection scale + float cascadeNear[2] = { 0.f, 0.f }; + float cascadeFar[2] = { 1.f, 1.f }; + float cascadeScale[2] = { 1.f, 1.f }; + // Samplers ID3D11SamplerState* linearSampler = nullptr; @@ -69,4 +106,5 @@ struct VolumetricShadows : Feature private: static void SetSharedShadowMapSRV(ID3D11DeviceContext* a_context, ID3D11ShaderResourceView* a_srv); + void ExtractCascadeNearFar(); };