diff --git a/.gitmodules b/.gitmodules index d4fb422d95..17f9a4254b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "extern/FidelityFX-SDK"] path = extern/FidelityFX-SDK url = https://github.com/MapleHinata/FidelityFX-SDK +[submodule "extern/NRD"] + path = extern/NRD + url = https://github.com/NVIDIA-RTX/NRD.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 558ac470ee..ee1bfdc90d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,8 +73,18 @@ find_package(unordered_dense CONFIG REQUIRED) find_package(efsw CONFIG REQUIRED) find_package(Tracy CONFIG REQUIRED) find_package(directx-headers CONFIG REQUIRED) +find_package(directx-dxc CONFIG REQUIRED) +find_package(D3D12MemoryAllocator CONFIG REQUIRED) add_subdirectory(${CMAKE_SOURCE_DIR}/cmake/Streamline) +set(NRD_USE_DX12 ON CACHE BOOL "" FORCE) +set(NRD_STATIC_LIBRARY ON CACHE BOOL "" FORCE) + +set(NRD_EMBEDS_SPIRV_SHADERS OFF CACHE BOOL "" FORCE) +set(NRD_EMBEDS_DXBC_SHADERS OFF CACHE BOOL "" FORCE) + +add_subdirectory(extern/NRD) + find_path(DETOURS_INCLUDE_DIRS "detours/detours.h") find_library(DETOURS_LIBRARY detours REQUIRED) include(FidelityFX-SDK) @@ -104,6 +114,7 @@ target_include_directories( ${CLIB_UTIL_INCLUDE_DIRS} "${CMAKE_SOURCE_DIR}/package/Shaders" ${DETOURS_INCLUDE_DIRS} + "extern/NRD/Include" ) target_link_libraries( @@ -124,6 +135,9 @@ target_link_libraries( Streamline d3d12.lib Microsoft::DirectX-Headers + Microsoft::DirectXShaderCompiler + GPUOpen::D3D12MemoryAllocator + NRD ${DETOURS_LIBRARY} ) diff --git a/extern/NRD b/extern/NRD new file mode 160000 index 0000000000..b705fd09c2 --- /dev/null +++ b/extern/NRD @@ -0,0 +1 @@ +Subproject commit b705fd09c286def8a2205afde858485513679ccf diff --git a/features/Raytracing/Shaders/Features/Raytracing.ini b/features/Raytracing/Shaders/Features/Raytracing.ini new file mode 100644 index 0000000000..19f01444dc --- /dev/null +++ b/features/Raytracing/Shaders/Features/Raytracing.ini @@ -0,0 +1,2 @@ +[Info] +Version = 1-0-0 \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/CompositeCS.hlsl b/features/Raytracing/Shaders/Raytracing/CompositeCS.hlsl new file mode 100644 index 0000000000..848e9566d1 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/CompositeCS.hlsl @@ -0,0 +1,43 @@ +#include "Common/Color.hlsli" + +Texture2D MainInputTexture : register(t0); +Texture2D DiffuseAlbedoTexture : register(t1); +Texture2D DiffuseGITexture : register(t2); +Texture2D SpecularGITexture : register(t3); + +RWTexture2D MainOutputTexture : register(u0); + +cbuffer AccumulationCB : register(b2) +{ + uint AccumulatedFrames; + float3 _padding; +} + +[numthreads(8, 8, 1)] +void main(uint2 id : SV_DispatchThreadID) +{ +#if defined(ACCUMULATION) + float3 previousAccumulated = MainInputTexture[id].rgb; + float3 currentPathTraced = DiffuseAlbedoTexture[id].rgb; + + float3 outputColor = lerp(previousAccumulated, currentPathTraced, 1.0 / (AccumulatedFrames + 1)); +#elif defined(COMPOSITE) + float3 outputColor = Color::GammaToTrueLinear(MainInputTexture[id].rgb); + +# if defined(DIFFUSE) + outputColor += DiffuseAlbedoTexture[id].rgb * DiffuseGITexture[id].rgb; +# endif // DIFFUSE + +# if defined(SPECULAR) + outputColor += SpecularGITexture[id].rgb; +# endif // SPECULAR +#else + float3 outputColor = DiffuseGITexture[id].rgb; +#endif // COMPOSITE + +#if defined(GAMMA_OUTPUT) + outputColor = Color::TrueLinearToGamma(outputColor); +#endif // GAMMA_OUTPUT + + MainOutputTexture[id] = float4(outputColor, 1.0f); +} diff --git a/features/Raytracing/Shaders/Raytracing/ConvertTexturesCS.hlsl b/features/Raytracing/Shaders/Raytracing/ConvertTexturesCS.hlsl new file mode 100644 index 0000000000..c4ff2eb2ae --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/ConvertTexturesCS.hlsl @@ -0,0 +1,54 @@ +#include "Raytracing/Includes/Common.hlsli" +#include "Common/FrameBuffer.hlsli" +#include "Common/GBuffer.hlsli" +#include "Common/Color.hlsli" + +Texture2D NormalGlossiness : register(t0); +Texture2D Albedo : register(t1); +Texture2D GNMAO : register(t2); +Texture2D MotionVectors : register(t3); + +RWTexture2D NormalRoughness : register(u0); +RWTexture2D Diffuse : register(u1); +RWTexture2D MotionVectorsOut : register(u2); + +cbuffer RenderResCB : register(b0) +{ + uint2 RenderRes; + float2 RenderResRcp; +}; + +SamplerState Sampler : register(s0); + +[numthreads(8, 8, 1)] +void main(uint2 id : SV_DispatchThreadID) +{ + if (any(id >= RenderRes)) + return; + + const float2 uv = float2(id.xy + 0.5f) * RenderResRcp; +#ifndef PT + const unorm half3 normalGlossiness = NormalGlossiness.SampleLevel(Sampler, uv, 0).xyz; + const snorm half3 normalWS = normalize(ViewToWorldVector(GBuffer::DecodeNormal(normalGlossiness.xy), FrameBuffer::CameraViewInverse[0])); + NormalRoughness[id] = half4(normalWS, 1.0f - normalGlossiness.z); + + const float4 albedo = Albedo.SampleLevel(Sampler, uv, 0); + const float metallic = GNMAO.SampleLevel(Sampler, uv, 0).z; + Diffuse[id] = float4(Color::GammaToTrueLinear(albedo.rgb) * (1.0f - metallic), albedo.a); +#endif + MotionVectorsOut[id] = MotionVectors.SampleLevel(Sampler, uv, 0); +} + +[numthreads(8, 8, 1)] +void main2(uint2 id : SV_DispatchThreadID) +{ + const unorm half3 normalGlossiness = NormalGlossiness[id].xyz; + const snorm half3 normalWS = normalize(ViewToWorldVector(GBuffer::DecodeNormal(normalGlossiness.xy), FrameBuffer::CameraViewInverse[0])); + NormalRoughness[id] = half4(normalWS, 1.0f - normalGlossiness.z); + + float metallic, ao; + UnpackMAO(GNMAO[id].z, metallic, ao); + Diffuse[id] = Albedo[id] * (1.0f - metallic); + + MotionVectorsOut[id] = MotionVectors[id]; +} diff --git a/features/Raytracing/Shaders/Raytracing/CopyDepthCS.hlsl b/features/Raytracing/Shaders/Raytracing/CopyDepthCS.hlsl new file mode 100644 index 0000000000..ae40d6c5b5 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/CopyDepthCS.hlsl @@ -0,0 +1,22 @@ +#include "Common/SharedData.hlsli" +#include "Raytracing/Includes/Common.hlsli" + +Texture2D DepthIn : register(t0); +RWTexture2D DepthOut : register(u0); +RWTexture2D DepthViewOut : register(u1); + +[numthreads(8, 8, 1)] +void main(uint2 id : SV_DispatchThreadID) +{ + uint width, height; + DepthIn.GetDimensions(width, height); + + if (id.x >= width || id.y >= height) + return; + + const float depthScreen = DepthIn[id]; + DepthOut[id] = depthScreen; + + float depthLinear = ScreenToViewDepth(depthScreen, SharedData::CameraData); + DepthViewOut[id] = float2(depthLinear, 0.0f); +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/CubeToHemiCS.hlsl b/features/Raytracing/Shaders/Raytracing/CubeToHemiCS.hlsl new file mode 100644 index 0000000000..0bf10bd9d2 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/CubeToHemiCS.hlsl @@ -0,0 +1,31 @@ +TextureCube CubeMap : register(t0); +TextureCube OcclusionMap : register(t1); + +SamplerState Sampler : register(s0); + +RWTexture2D HemisphereOut : register(u0); + +[numthreads(8, 8, 1)] +void main(uint2 id : SV_DispatchThreadID) +{ + + if (id.x >= RESOLUTION || id.y >= RESOLUTION) + return; + + const float2 uv = float2(id.xy + 0.5f) / float2(RESOLUTION, RESOLUTION); + const float2 xy = uv * 2.0f - 1.0f; + + const float r = length(xy); + + const float phi = atan2(xy.y, xy.x); + + const float z = 1.0f - r*r; + const float k = sqrt(1.0f - z*z); + + const float3 dir = float3(k * cos(phi), k * sin(phi), z); + + const float3 color = CubeMap.SampleLevel(Sampler, dir, 0.0f).rgb; + const float occlusion = OcclusionMap.SampleLevel(Sampler, dir, 0.0f).r; + + HemisphereOut[id.xy] = float4(color, occlusion); +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Denoiser/NRD/Shared.hlsli b/features/Raytracing/Shaders/Raytracing/Denoiser/NRD/Shared.hlsli new file mode 100644 index 0000000000..e0ab302b4b --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Denoiser/NRD/Shared.hlsli @@ -0,0 +1,162 @@ +// © 2022 NVIDIA Corporation + +//============================================================================================= +// SETTINGS +//============================================================================================= + +// Fused or separate denoising selection +// 0 - DIFFUSE and SPECULAR +// 1 - DIFFUSE_SPECULAR +#define NRD_COMBINED 1 + +// NORMAL - common (non specialized) denoisers +// SH - SH (spherical harmonics or spherical gaussian) denoisers +// OCCLUSION - OCCLUSION (ambient or specular occlusion only) denoisers +// DIRECTIONAL_OCCLUSION - DIRECTIONAL_OCCLUSION (ambient occlusion in SH mode) denoisers +#define NRD_MODE NORMAL // NRD sample recompilation required +#define SIGMA_TRANSLUCENCY 0 + +// Default = 1 +#define USE_IMPORTANCE_SAMPLING 1 +#define USE_SHARC_DITHERING 1.5 // radius in voxels +#define USE_TRANSLUCENCY 1 // translucent foliage +#define USE_MOVING_EMISSION_FIX 1 // fixes a dark tail, left by an animated emissive object + +// Default = 0 +#define USE_SANITIZATION 0 // NRD sample is NAN/INF free +#define USE_SIMULATED_MATERIAL_ID_TEST 0 // "material ID" debugging +#define USE_SIMULATED_FIREFLY_TEST 0 // "anti-firefly" debugging +#define USE_CAMERA_ATTACHED_REFLECTION_TEST 0 // test special treatment for reflections of objects attached to the camera +#define USE_RUSSIAN_ROULETTE 0 // bad practice for real-time denoising +#define USE_DRS_STRESS_TEST 0 // NRD must not touch GARBAGE data outside of DRS rectangle +#define USE_INF_STRESS_TEST 0 // NRD must not touch GARBAGE data outside of denoising range +#define USE_ANOTHER_COBALT 0 // another cobalt variant +#define USE_PUDDLES 0 // add puddles +#define USE_RANDOMIZED_ROUGHNESS 0 // randomize roughness ( a common case in games ) +#define USE_STOCHASTIC_SAMPLING 0 // needed? +#define USE_LOAD 0 // Load vs SampleLevel +#define USE_SHARC_DEBUG 0 // 1 - show cache, 2 - show grid (NRD sample recompile required) +#define USE_TAA_DEBUG 0 // 1 - show weight +#define USE_BIAS_FIX 0 // fixes negligible hair and specular bias + +//============================================================================================= +// CONSTANTS +//============================================================================================= + +// NRD variant +#define NORMAL 0 +#define SH 1 // NORMAL + SH (SG) resolve +#define OCCLUSION 2 +#define DIRECTIONAL_OCCLUSION 3 // diffuse OCCLUSION + SH (SG) resolve + +// Denoiser +#define DENOISER_REBLUR 0 +#define DENOISER_RELAX 1 +#define DENOISER_REFERENCE 2 + +// Resolution +#define RESOLUTION_FULL 0 +#define RESOLUTION_FULL_PROBABILISTIC 1 +#define RESOLUTION_HALF 2 + +// What is on screen? +#define SHOW_FINAL 0 +#define SHOW_DENOISED_DIFFUSE 1 +#define SHOW_DENOISED_SPECULAR 2 +#define SHOW_AMBIENT_OCCLUSION 3 +#define SHOW_SPECULAR_OCCLUSION 4 +#define SHOW_SHADOW 5 +#define SHOW_BASE_COLOR 6 +#define SHOW_NORMAL 7 +#define SHOW_ROUGHNESS 8 +#define SHOW_METALNESS 9 +#define SHOW_MATERIAL_ID 10 +#define SHOW_PSR_THROUGHPUT 11 +#define SHOW_WORLD_UNITS 12 +#define SHOW_INSTANCE_INDEX 13 +#define SHOW_UV 14 +#define SHOW_CURVATURE 15 +#define SHOW_MIP_PRIMARY 16 +#define SHOW_MIP_SPECULAR 17 + +// Predefined material override +#define MATERIAL_GYPSUM 1 +#define MATERIAL_COBALT 2 + +// Material ID +#define MATERIAL_ID_DEFAULT 0.0f +#define MATERIAL_ID_METAL 1.0f +#define MATERIAL_ID_HAIR 2.0f +#define MATERIAL_ID_SELF_REFLECTION 3.0f + +// Mip mode +#define MIP_VISIBILITY 0 // for visibility: emission, shadow and alpha mask +#define MIP_LESS_SHARP 1 // for normal +#define MIP_SHARP 2 // for albedo and roughness + +// Register spaces ( sets ) +#define SET_OTHER 0 +#define SET_RAY_TRACING 1 +#define SET_SHARC 2 +#define SET_MORPH 3 +#define SET_ROOT 4 + +// Path tracing +#define PT_THROUGHPUT_THRESHOLD 0.001 +#define PT_IMPORTANCE_SAMPLES_NUM 16 +#define PT_SPEC_LOBE_ENERGY 0.95 // trimmed to 95% +#define PT_SHADOW_RAY_OFFSET 1.0 // pixels +#define PT_BOUNCE_RAY_OFFSET 0.25 // pixels +#define PT_GLASS_RAY_OFFSET 0.05 // pixels +#define PT_MAX_FIREFLY_RELATIVE_INTENSITY 20.0 // no more than 20x energy increase in case of probabilistic sampling +#define PT_EVIL_TWIN_LOBE_TOLERANCE 0.005 // normalized % +#define PT_GLASS_MIN_F 0.05 // adds a bit of stability and bias +#define PT_DELTA_BOUNCES_NUM 8 +#define PT_PSR_BOUNCES_NUM 2 +#define PT_RAY_FLAGS 0 + +// Spatial HAsh-based Radiance Cache ( SHARC ) +#define SHARC_CAPACITY ( 1 << 22 ) +#define SHARC_SCENE_SCALE 45.0 +#define SHARC_DOWNSCALE 5 +#define SHARC_ANTI_FIREFLY false +#define SHARC_STALE_FRAME_NUM_MIN 32 // new version uses 8 by default, old value offers more stability in voxels with low number of samples ( critical for glass ) +#define SHARC_SEPARATE_EMISSIVE 1 +#define SHARC_MATERIAL_DEMODULATION 1 +#define SHARC_USE_FP16 0 + +// Blue noise +#define BLUE_NOISE_SPATIAL_DIM 128 // see StaticTexture::ScramblingRanking +#define BLUE_NOISE_TEMPORAL_DIM 4 // good values: 4-8 for shadows, 8-16 for occlusion, 8-32 for lighting + +// Other +#define FP16_MAX 65504.0 +#define INF 1e5 +#define LINEAR_BLOCK_SIZE 256 +#define FP16_VIEWZ_SCALE 0.125 // TODO: tuned for meters, needs to be scaled down for cm and mm +#define MAX_MIP_LEVEL 11.0 +#define LEAF_TRANSLUCENCY 0.25 +#define LEAF_THICKNESS 0.001 // TODO: viewZ dependent? +#define STRAND_THICKNESS 80e-6f +#define TAA_HISTORY_SHARPNESS 0.66 // sharper ( was 0.5 ) +#define TAA_SIGMA_SCALE 2.0 // allow nano ghosting ( was 1.0 ) // TODO: can negatively affect moving shadows +#define GARBAGE sqrt( -1.0 ) // sqrt( -1.0 ) or -log( 0.0 ) or 32768.0 + +#define MORPH_MAX_ACTIVE_TARGETS_NUM 8u +#define MORPH_ELEMENTS_PER_ROW_NUM 4 +#define MORPH_ROWS_NUM ( MORPH_MAX_ACTIVE_TARGETS_NUM / MORPH_ELEMENTS_PER_ROW_NUM ) + +// Instance flags +#define FLAG_FIRST_BIT 24 // this + number of flags must be <= 32 +#define NON_FLAG_MASK ( ( 1 << FLAG_FIRST_BIT ) - 1 ) + +#define FLAG_NON_TRANSPARENT 0x01 // geometry flag: non-transparent +#define FLAG_TRANSPARENT 0x02 // geometry flag: transparent +#define FLAG_FORCED_EMISSION 0x04 // animated emissive cube +#define FLAG_STATIC 0x08 // no velocity +#define FLAG_HAIR 0x10 // hair +#define FLAG_LEAF 0x20 // leaf +#define FLAG_SKIN 0x40 // skin +#define FLAG_MORPH 0x80 // morph + +#define GEOMETRY_ALL ( FLAG_NON_TRANSPARENT | FLAG_TRANSPARENT ) \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/Common.hlsli b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/Common.hlsli new file mode 100644 index 0000000000..35e2a9c82c --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/Common.hlsli @@ -0,0 +1,92 @@ +#ifndef SVGF_COMMON_HLSI +#define SVGF_COMMON_HLSI + +#include "Common/FrameBuffer.hlsli" +#include "Common/Color.hlsli" +#include "Raytracing/Denoiser/SVGF/SVGF.hlsli" +#include "Raytracing/Includes/Common.hlsli" + +cbuffer RenderResCB : register(b0) +{ + uint2 Resolution; + float2 ResolutionRcp; +}; + +cbuffer SVGFCB : register(b1) +{ + SVGF Frame; +}; + +Texture2D NormalRoughnessTexture : register(t2); + +SamplerState LinearSampler : register(s0); + +#define VAR_EPSILON (0.00001f) + +void GetNormalRoughness(uint2 dtid, out float3 normal, out float roughness) +{ + float4 normalRoughness = NormalRoughnessTexture[dtid]; + // Normal is in world space + normal = normalRoughness.xyz; + roughness = normalRoughness.w; +} + +void GetNormalRoughness(Texture2D NormalRoughness, uint2 dtid, out float3 normal, out float roughness) +{ + float4 normalRoughness = NormalRoughness[dtid]; + // Normal is in world space + normal = normalRoughness.xyz; + roughness = normalRoughness.w; +} + +float CalculateWeight(float depthCenter, float depthP, float phiD, float3 normalCenter, float3 normalP, float phiN, float luminanceCenter, float luminanceP, float phiL) +{ + // Depth weight + float weightDepth = exp(-abs(depthCenter - depthP) / max(phiD, VAR_EPSILON)); + + // Normal weight + float weightNormal = pow(max(0.0f, dot(normalCenter, normalP)), phiN); + + // Luminance weight + float weightLuminance = exp(-abs(luminanceCenter - luminanceP) / max(phiL, VAR_EPSILON)); + + return weightDepth * weightNormal * weightLuminance; +} + +float2 ReprojectUV(Texture2D MotionTexture, in float2 uv, in float depth, in uint eyeIndex) +{ + // Camera motion for pixel (in ScreenPos space). + float2 thisScreen = (uv.xy - 0.5f) * float2(2.0f, -2.0f); + + float4 thisClip = float4(thisScreen, depth, 1); + + float4 thisView = mul(FrameBuffer::CameraProjUnjitteredInverse[eyeIndex], thisClip); + thisView.xyz = thisView.xyz / thisView.w; + + float4 thisWorld = mul(FrameBuffer::CameraViewInverse[eyeIndex], float4(thisView.xyz, 1.0f)); + thisWorld.xyz = (thisWorld.xyz / thisWorld.w) + FrameBuffer::CameraPosAdjust[eyeIndex].xyz; + + float4 prevClip = mul(FrameBuffer::CameraPreviousViewProjUnjittered[eyeIndex], float4(thisWorld.xyz, 1.0f)); + + float2 prevScreen = prevClip.xy / prevClip.w; + + float2 velocity = MotionTexture.SampleLevel(LinearSampler, uv.xy * FrameBuffer::DynamicResolutionParams1.xy, 0).xy; + + prevScreen = thisClip.xy + velocity * float2(2.f, -2.f); + + return prevScreen.xy * float2(0.5f, -0.5f) + 0.5f; +} + +float2 ReprojectUV2(Texture2D MotionTexture, in float2 uv, in float viewDepth, in uint eyeIndex) +{ + float2 velocity = MotionTexture.SampleLevel(LinearSampler, uv, 0).xy; + return uv + velocity / viewDepth; +} + +float2 ReprojectUVSimple(Texture2D MotionTexture, in float2 uv) +{ + float2 velocity = MotionTexture.SampleLevel(LinearSampler, uv, 0).xy; + return uv + velocity; +} + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/SVGF.hlsli b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/SVGF.hlsli new file mode 100644 index 0000000000..a379598df8 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/SVGF.hlsli @@ -0,0 +1,26 @@ +#ifndef SVGF_HLSI +#define SVGF_HLSI + +struct +#ifdef __cplusplus +alignas(16) +#endif + SVGF +{ + uint AtrousIterations; + float Alpha; + float MomentsAlpha; + float ColorPhi; + float NormalPhi; + float DepthPhi; + float DepthThreshold; + float NormalThreshold; + uint HistoryThreshold; + float4 NDCToView; + uint3 Pad; +}; +#ifdef __cplusplus +static_assert(sizeof(SVGF) % 16 == 0); +#endif + +#endif // SVGF_HLSI \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/SpatialCS.hlsl b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/SpatialCS.hlsl new file mode 100644 index 0000000000..b7aa5658e9 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/SpatialCS.hlsl @@ -0,0 +1,128 @@ +#include "Raytracing/Denoiser/SVGF/Common.hlsli" + +Texture2D InputTexture : register(t0); +Texture2D DepthTexture : register(t4); // Viewspace Depth in R, Depth Width in G + +RWTexture2D FilteredOutput : register(u0); + +#define GAUSSIAN_RADIUS (1) +#define SPATIAL_RADIUS (2) + +float GaussianBlur(int2 id, uint2 screenSize) +{ + float sum = 0.f; + float kernelSum = 0.f; + + const float kernel[2][2] = + { + { 1.0 / 4.0, 1.0 / 8.0 }, + { 1.0 / 8.0, 1.0 / 16.0 } + }; + + for (int y = -GAUSSIAN_RADIUS; y <= GAUSSIAN_RADIUS; y++) + { + for (int x = -GAUSSIAN_RADIUS; x <= GAUSSIAN_RADIUS; x++) + { + const int2 p = id + int2(x, y); + + if (all(p >= 0) && all(p < screenSize)) + { + const float k = kernel[abs(x)][abs(y)]; + sum += InputTexture[p].w * k; + kernelSum += k; + } + } + } + + return sum / kernelSum; +} + +// Spatiotemporal Variance-Guided Filter +[numthreads(8, 8, 1)] void main(uint2 DTid : SV_DispatchThreadID) +{ + const uint2 screenSize = Resolution; + if (DTid.x >= screenSize.x || DTid.y >= screenSize.y) + return; + + const float2 uv = float2(DTid.xy + 0.5) * ResolutionRcp; + + const float4 inputColor = InputTexture[DTid.xy]; + + const float2 depthWidthCenter = DepthTexture[DTid.xy].xy; + + /*if (depthCenter <= 0.0f || depthCenter >= 1.0f) + { + FilteredOutput[DTid.xy] = inputColor; + return; + }*/ + + const int2 sDTid = int2(DTid.xy); + + float3 normalWS; + float roughness; + GetNormalRoughness(DTid.xy, normalWS, roughness); + roughness = clamp(roughness, 0.001f, 1.0f); + + float luminanceCenter = Color::RGBToLuminance(inputColor.rgb); + float variance = GaussianBlur(sDTid.xy, screenSize); + + float phiLuminance = Frame.ColorPhi * sqrt(max(VAR_EPSILON, variance)); + float phiNormal = Frame.NormalPhi; + float phiDepth = Frame.AtrousIterations * depthWidthCenter.y * Frame.DepthPhi; + +#if defined(SSRT_SPECULAR) + // Trying to reduce blurriness on glossy surfaces + phiLuminance *= roughness; + phiNormal /= max(roughness, 0.05f); +#endif + + float weightSum = 0.f; + float3 blendedColor = 0; + + const float kernelWeights[3] = { 1.0, 2.0 / 3.0, 1.0 / 6.0 }; + + for (int y = -SPATIAL_RADIUS; y <= SPATIAL_RADIUS; y++) + { + for (int x = -SPATIAL_RADIUS; x <= SPATIAL_RADIUS; x++) + { + if (x == 0 && y == 0) continue; + + // A-Trous sampling + int2 samplePos = sDTid + int2(x, y) * Frame.AtrousIterations; + + if (all(samplePos >= 0) && all(samplePos < screenSize)) + { + float4 sampleColor = InputTexture[samplePos]; + float sampleDepth = DepthTexture[samplePos].x; + + if (sampleDepth > 0) + { + float3 sampleNormalWS; + float sampleRoughness; + GetNormalRoughness(samplePos, sampleNormalWS, sampleRoughness); + + float luminanceP = Color::RGBToLuminance(sampleColor.rgb); + + float weight = CalculateWeight(depthWidthCenter.x, sampleDepth, phiDepth, normalWS, sampleNormalWS, phiNormal, luminanceCenter, luminanceP, phiLuminance); + + float kernel = kernelWeights[abs(x)] * kernelWeights[abs(y)]; + weight *= kernel; + + blendedColor += sampleColor.rgb * weight; + weightSum += weight; + } + } + } + } + + if (weightSum > 0.f) + { + blendedColor /= weightSum; + } + else + { + blendedColor = inputColor.rgb; + } + + FilteredOutput[DTid.xy] = float4(blendedColor, variance); +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/TemporalCS.hlsl b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/TemporalCS.hlsl new file mode 100644 index 0000000000..1f79ee7ebd --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/TemporalCS.hlsl @@ -0,0 +1,108 @@ +#include "Raytracing/Denoiser/SVGF/Common.hlsli" + +Texture2D HistoryTexture : register(t0); +Texture2D MotionVectorTexture : register(t1); +Texture2D NoisyInputTexture : register(t3); +Texture2D HistoryMomentsTexture : register(t4); // moments in RG, frame count in B +Texture2D HistoryDepthTexture : register(t5); +Texture2D HistoryNormalsTexture : register(t6); +//Texture2D DepthTexture : register(t7); + +RWTexture2D FilteredOutput : register(u0); +RWTexture2D MomentsOutput : register(u1); +RWTexture2D DepthOutput : register(u2); // Screen Depth in R, Viewspace Depth in G + +bool IsValidHistory(in uint2 pixel, in float2 uv, in float currDepth, in float3 currNormalWS) +{ + const uint2 screenSize = Resolution; + + if (any(uv < 0.0f) || any(uv > 1.0f)) + return false; + + if (any(pixel >= screenSize)) + return false; + + float3 prevNormalWS; + float roughness; + GetNormalRoughness(HistoryNormalsTexture, pixel, prevNormalWS, roughness); + + if (dot(currNormalWS, prevNormalWS) < Frame.NormalThreshold) // cos + return false; + + float prevDepth = HistoryDepthTexture[pixel].x; + float depthDiff = abs(currDepth - prevDepth) / currDepth; + + if (depthDiff > Frame.DepthThreshold) // difference % + return false; + + return true; +} + +[numthreads(8, 8, 1)] +void main(uint2 id : SV_DispatchThreadID) +{ + const uint2 screenSize = Resolution; + if (any(id.xy >= screenSize)) + return; + + const float2 uv = float2(id.xy + 0.5) * ResolutionRcp; + + const float4 inputColor = NoisyInputTexture[id.xy]; + const float2 depth = DepthOutput[id.xy].xy; + + float depthCenter = depth.x; + //float depthCenter = DepthTexture[id.xy]; + + float3 normalWS; + float roughness; + GetNormalRoughness(id.xy, normalWS, roughness); + + // Reproject UVs using motion vectors + float2 prevUV = ReprojectUVSimple(MotionVectorTexture, uv); + //float2 prevUV = ReprojectUV(MotionVectorTexture, uv, depthCenter, 0u); + + float4 prevColor = 0.f; + float prevAccumFrames = 0.f; + float2 prevMoments = float2(0.f, 0.f); + uint2 prevPixel = uint2(prevUV * screenSize); + + bool valid = IsValidHistory(prevPixel, prevUV, depthCenter, normalWS); + + if (valid) + { + prevColor = HistoryTexture[prevPixel]; + + const float3 historyMoments = HistoryMomentsTexture[prevPixel].xyz; + prevAccumFrames = historyMoments.z; + prevMoments = historyMoments.xy; + } + + float curAccumFrames = min(64.0f, valid ? prevAccumFrames + 1.0f : 1.0f); + + float invPrevAccumFrames = 1.0f / curAccumFrames; + + float alpha = valid ? max(Frame.Alpha, invPrevAccumFrames) : 1.0f; + float momentAlpha = valid ? max(Frame.MomentsAlpha, invPrevAccumFrames) : 1.0f; + + float luminance = Color::RGBToLuminance(inputColor.rgb); + float2 curMoment = float2(luminance, luminance * luminance); + + float3 blendedColor = lerp(prevColor.rgb, inputColor.rgb, alpha); + float2 blendedMoment = lerp(prevMoments, curMoment, momentAlpha); + + float variance = max(0.0f, blendedMoment.y - blendedMoment.x * blendedMoment.x); + + FilteredOutput[id.xy] = float4(blendedColor, variance); + MomentsOutput[id.xy] = float4(blendedMoment, curAccumFrames, 0.f); + + // Build depth width + float depthL = DepthOutput[id.xy + int2(-1, 0)].x; + float depthR = DepthOutput[id.xy + int2(1, 0)].x; + + float depthU = DepthOutput[id.xy + int2(0, 1)].x; + float depthD = DepthOutput[id.xy + int2(0, -1)].x; + + float depthW = abs(depthCenter - depthL) + abs(depthCenter - depthL) + abs(depthCenter - depthU) + abs(depthCenter - depthD); + + DepthOutput[id.xy] = float2(depthCenter, depthW * 0.5f); +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/VarianceCS.hlsl b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/VarianceCS.hlsl new file mode 100644 index 0000000000..c942c28761 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Denoiser/SVGF/VarianceCS.hlsl @@ -0,0 +1,92 @@ +#include "Raytracing/Denoiser/SVGF/Common.hlsli" + +Texture2D HistoryTexture : register(t0); +Texture2D MomentsTexture : register(t1); +Texture2D TemporalTexture : register(t3); +Texture2D DepthTexture : register(t4); // Viewspace Depth in R, Depth Width in G + +RWTexture2D VarianceOutput : register(u0); + +#define RADIUS (3) + +[numthreads(8, 8, 1)] void main(uint2 DTid : SV_DispatchThreadID) +{ + const uint2 screenSize = Resolution; + + if (any(DTid.xy >= screenSize)) + return; + + float2 uv = float2(DTid.xy + 0.5) * ResolutionRcp; + + const float4 temporalColor = TemporalTexture[DTid.xy]; + const float2 depthWidthCenter = DepthTexture[DTid.xy].xy; + + /*if (depthCenter <= FP_Z || depthCenter > SKY_Z) + { + VarianceOutput[DTid.xy] = temporalColor; + return; + }*/ + + const float3 moments = MomentsTexture[DTid.xy].xyz; + const float history = moments.z; + + const float historyThreshold = float(Frame.HistoryThreshold); + + if (history <= historyThreshold) { + float3 normalWS; + float roughness; + GetNormalRoughness(DTid.xy, normalWS, roughness); + + float luminanceCenter = Color::RGBToLuminance(temporalColor.xyz); + + float weightSum = 0.f; + float3 colorSum = temporalColor.xyz; + float2 momentsSum = moments.xy; + + const float normalPhi = Frame.NormalPhi; + const float colorPhi = Frame.ColorPhi; + const float phiDepth = RADIUS * depthWidthCenter.y * Frame.DepthPhi; + + for (int y = -RADIUS; y <= RADIUS; y++) + { + for (int x = -RADIUS; x <= RADIUS; x++) + { + /*if (x == 0 && y == 0) + continue;*/ + + const int2 samplePos = int2(DTid.xy) + int2(x, y); + + if (all(samplePos >= 0) && all(samplePos < screenSize)) + { + float4 neighborTemporalColor = TemporalTexture[samplePos]; + + float3 neighborNormalWS; + float neighborRoughness; + GetNormalRoughness(samplePos, neighborNormalWS, neighborRoughness); + float neighborLuminance = Color::RGBToLuminance(neighborTemporalColor.xyz); + float depthNeighbor = DepthTexture[samplePos].x; + + float weight = CalculateWeight(depthWidthCenter.x, depthNeighbor, phiDepth, normalWS, neighborNormalWS, normalPhi, luminanceCenter, neighborLuminance, colorPhi); + + weightSum += weight; + colorSum += neighborTemporalColor.xyz * weight; + momentsSum += MomentsTexture[samplePos].xy * weight; + } + } + } + + weightSum = max(weightSum, VAR_EPSILON); + + colorSum /= weightSum; + momentsSum /= weightSum; + + float variance = max(0.0f, momentsSum.y - momentsSum.x * momentsSum.x); + variance *= historyThreshold / max(history, 1.0f); + + VarianceOutput[DTid.xy] = float4(colorSum, variance); + } + else + { + VarianceOutput[DTid.xy] = temporalColor; + } +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/GI/AnyHit.hlsl b/features/Raytracing/Shaders/Raytracing/GI/AnyHit.hlsl new file mode 100644 index 0000000000..a828036a80 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/GI/AnyHit.hlsl @@ -0,0 +1,49 @@ +#include "Raytracing/Includes/Types.hlsli" +#include "Raytracing/Includes/Registers.hlsli" +#include "Raytracing/Includes/RT/CommonRT.hlsli" +#include "Raytracing/Includes/RT/Geometry.hlsli" +#include "Raytracing/Includes/PBR.hlsli" + +#include "Common/Color.hlsli" + +[shader("anyhit")] +void main(inout Payload payload, in BuiltInTriangleIntersectionAttributes attribs) +{ + Shape shape = GetShape(InstanceIndex(), GeometryIndex()); + + Vertex v0, v1, v2; + GetVertices(shape.GeometryIdx, PrimitiveIndex(), v0, v1, v2); + + float3 uvw = GetBary(attribs.barycentrics); + + Material material = shape.Material; + + float2 texCoord = material.TexCoord(Interpolate(v0.Texcoord0, v1.Texcoord0, v2.Texcoord0, uvw)); + + float alpha = Textures[NonUniformResourceIndex(material.BaseTexture())].SampleLevel(BaseSampler, texCoord, 0).a; + + alpha *= material.BaseColor().a; + + if ((material.ShaderFlags & ShaderFlags::kVertexAlpha) && !(material.ShaderFlags & ShaderFlags::kTreeAnim)) + { + alpha *= Interpolate(v0.Color.unpack().a, v1.Color.unpack().a, v2.Color.unpack().a, uvw); + } + + [branch] + if (material.AlphaFlags & AlphaFlags::kAlphaTest) + { + if (alpha < material.AlphaThreshold()) + { + IgnoreHit(); + } + } + if ((material.AlphaFlags & AlphaFlags::kAlphaBlend) && (material.Feature == Feature::kHairTint || material.Feature == Feature::kFaceGen || material.Feature == Feature::kFaceGenRGBTint || material.Feature == Feature::kEye || material.ShaderFlags & ShaderFlags::kTwoSided)) + { + float rnd = Random(payload.randomSeed); + if (rnd > alpha) + { + IgnoreHit(); + } + } +} + diff --git a/features/Raytracing/Shaders/Raytracing/GI/ClosestHit.hlsl b/features/Raytracing/Shaders/Raytracing/GI/ClosestHit.hlsl new file mode 100644 index 0000000000..b48ace733d --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/GI/ClosestHit.hlsl @@ -0,0 +1,12 @@ +#include "Raytracing/Includes/Types.hlsli" + +#include "Raytracing/Includes/Materials/TexLODHelpers.hlsli" + +[shader("closesthit")] +void main(inout Payload payload, in BuiltInTriangleIntersectionAttributes attribs) +{ + payload.hitDistance = RayTCurrent(); + payload.primitiveIndex = PrimitiveIndex(); + payload.PackBarycentrics(attribs.barycentrics); + payload.PackInstanceGeometryIndex(InstanceIndex(), GeometryIndex()); +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/GI/Miss.hlsl b/features/Raytracing/Shaders/Raytracing/GI/Miss.hlsl new file mode 100644 index 0000000000..e2702b2377 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/GI/Miss.hlsl @@ -0,0 +1,7 @@ +#include "Raytracing/Includes/Types.hlsli" + +[shader("miss")] +void main(inout Payload payload) +{ + +} diff --git a/features/Raytracing/Shaders/Raytracing/GI/RayGeneration.hlsl b/features/Raytracing/Shaders/Raytracing/GI/RayGeneration.hlsl new file mode 100644 index 0000000000..35cd6a3bf2 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/GI/RayGeneration.hlsl @@ -0,0 +1,597 @@ +#include "Raytracing/Includes/Types.hlsli" + +#include "Raytracing/Includes/RT/SHaRC.hlsli" +#include "Raytracing/Includes/Registers.hlsli" +#include "Raytracing/Includes/RT/SHaRCHelper.hlsli" + +#include "Raytracing/Includes/Common.hlsli" +#include "Raytracing/Includes/ColorConversions.hlsli" +#include "Raytracing/Includes/RT/CommonRT.hlsli" +#include "Raytracing/Includes/RT/Shading.hlsli" +#include "Raytracing/Includes/RT/Geometry.hlsli" +#include "Raytracing/Includes/RT/SubsurfaceShading.hlsli" + +#include "Common/Color.hlsli" +#include "Common/BRDF.hlsli" + +#include "Raytracing/Includes/Surface.hlsli" + +#include "Raytracing/Includes/MonteCarlo.hlsli" +#include "Raytracing/Includes/PBR.hlsli" + +#include "Raytracing/Includes/Materials/BSDF.hlsli" +#include "Raytracing/Includes/Materials/TexLODHelpers.hlsli" + +[shader("raygeneration")] +void main() +{ + uint2 idx = DispatchRaysIndex().xy; + uint2 size = DispatchRaysDimensions().xy; + +#if defined(CHECKERBOARD) + if ((idx.x + idx.y) & 1) +#elif defined(TEMPORAL_CHECKERBOARD) + if ((idx.x + idx.y + Frame.FrameCount) & 1) +#endif +#if defined(CHECKERBOARD) || defined(TEMPORAL_CHECKERBOARD) + { + OutputTexture[idx] = float4(0.0f, 0.0f, 0.0f, 0.0f); + DiffuseAlbedoPathTracing[idx] = float4(0.0f, 0.0f, 0.0f, 1.0f); + NormalRoughnessPathTracing[idx] = float4(0.0f, 0.0f, 0.0f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + + return; + } +#endif + + uint randomSeed = InitRandomSeed(idx, size, Frame.FrameCount); + bool isSssPath = false; + +#if defined(SHARC) + SharcParameters sharcParameters = GetSharcParameters(); + +# if defined(SHARC_UPDATE) + [branch] + if (Frame.SHaRC.UpdatePass) { + uint startIndex = Hash(idx) % 25; + + uint2 blockOrigin = idx * 5; + + uint pixelIndex = (startIndex + Frame.FrameCount) % 25; + + idx = blockOrigin + uint2(pixelIndex % 5, pixelIndex / 5); + + if (any(idx >= Frame.DispatchSize)) + return; + + size = Frame.DispatchSize; + } +# endif + +#endif + +#if defined(PATH_TRACING) + const float2 uv = float2(idx + 0.5f) / size; + + float2 screenPos = uv * 2.0f - 1.0f; + screenPos.y = -screenPos.y; + + const float4 clip = float4(screenPos, 1.0f, 1.0f); + float4 view = mul(Frame.ProjInverse, clip); + view /= view.w; + + float3 sourceDirection = normalize(mul((float3x3)Frame.ViewInverse, view.xyz)); + + RayDesc sourceRay; + sourceRay.Origin = Frame.Position.xyz; + sourceRay.Direction = sourceDirection; + sourceRay.TMin = 0.1f; + sourceRay.TMax = 1e30; + + Payload sourcePayload; + sourcePayload.hitDistance = -1.0f; + sourcePayload.primitiveIndex = 0; + sourcePayload.PackBarycentrics(float2(0.0f, 0.0f)); + sourcePayload.PackInstanceGeometryIndex(0, 0); + sourcePayload.randomSeed = randomSeed; + + TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, 0xFF, DIFFUSE_RAY_HITGROUP_IDX, 0, DIFFUSE_RAY_MISS_IDX, sourceRay, sourcePayload); + randomSeed = sourcePayload.randomSeed; + + RayCone sourceRayCone = RayCone::make(Frame.PixelConeSpreadAngle * sourcePayload.hitDistance, Frame.PixelConeSpreadAngle); + + if (!sourcePayload.Hit()) + { +#if defined(SHARC) && defined(SHARC_UPDATE) + [branch] + if (Frame.SHaRC.UpdatePass) + return; +#endif + + const float4 mainColor = MainTexture.SampleLevel(BaseSampler, uv, 0); + + OutputTexture[idx] = float4(LLGammaToTrueLinear(mainColor.rgb), 0.0f); + DiffuseAlbedoPathTracing[idx] = float4(0.0f, 0.0f, 0.0f, 1.0f); + NormalRoughnessPathTracing[idx] = float4(0.0f, 0.0f, 0.0f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; + } + + float3 sourcePosition = Frame.Position.xyz + sourceDirection * sourcePayload.hitDistance; + + Instance sourceInstance; + Material sourceMaterial; + + Surface sourceSurface = Surface(sourcePosition, sourcePayload, sourceDirection, sourceRayCone, sourceInstance, sourceMaterial); + BRDFContext sourceBRDFContext = BRDFContext(sourceSurface, -sourceDirection); + if (dot(sourceSurface.FaceNormal, sourceBRDFContext.ViewDirection) < 0.0f) sourceSurface.FlipNormal(); + + StandardBSDF sourceBSDF = StandardBSDF::make(sourceSurface, true); + + AdjustShadingNormal(sourceSurface, sourceBRDFContext, true, false); + + // Direct Light for PT + float3 direct = sourceSurface.Emissive; +#ifdef SUBSURFACE_SCATTERING + if (sourceSurface.SubsurfaceData.HasSubsurface != 0) { + direct += EvaluateSubsurfaceNEE(sourceSurface, sourceBRDFContext, sourceMaterial, sourceInstance, sourcePayload, sourceRayCone, randomSeed); + isSssPath = true; + } + else +#endif + direct += EvaluateDirectRadiance(sourceMaterial, sourceSurface, sourceBRDFContext, sourceInstance, sourceBSDF, randomSeed); +#else + const float2 uv = float2(idx + 0.5f) / size; + + const float depth = DepthTexture.SampleLevel(BaseSampler, uv, 0) * 0.99998; + + const float depthView = ScreenToViewDepth(depth, Frame.CameraData); + + const float4 mainColor = MainTexture.SampleLevel(BaseSampler, uv, 0); + + [branch] + if (depthView < FP_VIEW_Z || depth >= SKY_Z) + { +#if defined(SHARC) && defined(SHARC_UPDATE) + [branch] + if (Frame.SHaRC.UpdatePass) + return; +#endif + +#if defined(RAW_RADIANCE) + OutputTexture[idx] = float4(0.0f, 0.0f, 0.0f, 0.0f); + SpecularAlbedo[idx] = float4(0.0f, 0.0f, 0.0f, 0.0f); +#else + OutputTexture[idx] = float4(LLGammaToTrueLinear(mainColor.rgb), mainColor.a); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; +#endif + return; + } + + // Normal is pre-transformed into World-Space and Smoothness becomes Roughness when we copy the RT to DX12 + const snorm half4 normalRoughness = (half4) NormalRoughnessTexture[idx]; + + // We should also scale the GBuffer for DLSSRR + const unorm float linearRoughness = normalRoughness.w; + + const unorm float4 normalMetalnessAO = GNMAOTexture.SampleLevel(BaseSampler, uv, 0); + + const half3 geometryNormalVS = DecodeNormal((half2)normalMetalnessAO.xy); + const float3 geometryNormalWS = normalize(ViewToWorldVector(geometryNormalVS, Frame.ViewInverse)); + +#if defined(DEBUG_GEOMNORMALOUT) + OutputTexture[idx] = float4(geometryNormalWS * 0.5f + 0.5f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(DEBUG_DEPTHOUT) + OutputTexture[idx] = float4(depth, 0.0f, 0.0f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(DEBUG_VIEWDEPTHOUT) + OutputTexture[idx] = float4(depthView, 0.0f, 0.0f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + + const float metalness = normalMetalnessAO.z; + const float ao = 1.0f; + +#if defined(DEBUG_ROUGHNESSOUT) + OutputTexture[idx] = float4(linearRoughness, 0.0f, 0.0f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(DEBUG_METALLICOUT) + OutputTexture[idx] = float4(metalness, 0.0f, 0.0f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(DEBUG_AOOUT) + OutputTexture[idx] = float4(ao, 0.0f, 0.0f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + + const float3 positionVS = ScreenToViewPosition(uv, depthView, Frame.NDCToView); + const float3 positionCS = ViewToWorldPosition(positionVS, Frame.ViewInverse); + const float3 positionWS = positionCS + Frame.Position.xyz; + + const float hitDistance = length(positionCS); + + const snorm half3 normalWS = normalRoughness.xyz; + + float3 tangentWS, bitangentWS; + CreateOrthonormalBasis(normalWS, tangentWS, bitangentWS); + + float3 albedo = LLGammaToTrueLinear(AlbedoTexture.SampleLevel(BaseSampler, uv, 0).rgb); + + RayCone sourceRayCone = RayCone::make(Frame.PixelConeSpreadAngle * hitDistance, Frame.PixelConeSpreadAngle); + + Surface sourceSurface = Surface(positionWS, geometryNormalWS, normalWS, tangentWS, bitangentWS, albedo, linearRoughness, metalness, 0, ao); + BRDFContext sourceBRDFContext = BRDFContext(sourceSurface, -positionCS / hitDistance); + + StandardBSDF sourceBSDF = StandardBSDF::make(sourceSurface, true); + + AdjustShadingNormal(sourceSurface, sourceBRDFContext, true, false); +#endif + +#if defined(DEBUG_MODELSPACE) + [branch] + if (sourceMaterial.ShaderFlags & ShaderFlags::kModelSpaceNormals) { + OutputTexture[idx] = float4(1.0f, 0.0f, 0.0f, 1.0f); + } else { + OutputTexture[idx] = float4(0.0f, 0.0f, 0.5f, 1.0f); + } + + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(DEBUG_NORMALOUT) || defined(DEBUG_TANGENTOUT) || defined(DEBUG_BITANGENTOUT) + +#if defined(DEBUG_NORMALOUT) + float3 output = sourceSurface.Normal; +#elif defined(DEBUG_TANGENTOUT) + float3 output = sourceSurface.Tangent; +#else + float3 output = sourceSurface.Bitangent; +#endif + + OutputTexture[idx] = float4(output * 0.5f + 0.5f, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(DEBUG_TRANSOUT) + OutputTexture[idx] = float4(sourceSurface.TransmissionColor, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(DEBUG_MIPLEVEL) + float3 output = TurboColormap(saturate(sourceSurface.MipLevel / 12.0f)); + OutputTexture[idx] = float4(output, 1.0f); + SpecularAlbedo[idx] = float4(0.5f, 0.5f, 0.5f, 0.0f); + SpecularHitDist[idx] = RAY_TMAX; + return; +#endif + +#if defined(SHARC) && defined(SHARC_DEBUG) + HashGridParameters gridParameters = GetSharcGridParameters(); + + OutputTexture[idx] = float4(HashGridDebugColoredHash(positionWS, geometryNormalWS, gridParameters), 1); + return; +#endif + + float3 direction; + MonteCarlo::BRDFWeight brdfWeight; + + float3 radiance = 0; + bool isSpecular = false; + float specHitDist = 0; + + RayDesc ray; + Payload payload; + + Instance instance; + Material material; + + Surface surface; + BRDFContext brdfContext; + + StandardBSDF bsdf; + + RayCone rayCone; + +#if defined(SHARC) + SharcState sharcState; + SharcHitData sharcHitData; +#endif + + [loop] + for (uint i = 0; i < MAX_SAMPLES; i++) + { +#if defined(SHARC) && defined(SHARC_UPDATE) + [branch] + if (Frame.SHaRC.UpdatePass) + { + SharcInit(sharcState); + } +#endif + + surface = sourceSurface; + brdfContext = sourceBRDFContext; + bsdf = sourceBSDF; + rayCone = sourceRayCone; +#if defined(PATH_TRACING) + material = sourceMaterial; + instance = sourceInstance; + payload = sourcePayload; +#endif + + float3 sampleRadiance = float3(0.0f, 0.0f, 0.0f); + float3 throughput = float3(1.0f, 1.0f, 1.0f); + float materialRoughnessPrev = 0.0f; + bool isEnter = true; + +#if defined(RAW_RADIANCE) + float3 throughputDelta = float3(1.0f, 1.0f, 1.0f); +#endif + + [loop] + for (uint j = 0; j < MAX_BOUNCES; j++) + { + BSDFSample bsdfSample; +#if LIGHTING_MODE == LIGHTING_MODE_DIFFUSE + direction = surface.Mul(SampleCosineHemisphere(randomSeed)); + + float NdotD = saturate(dot(surface.Normal, direction)); + + throughput *= surface.AO; + throughput *= surface.Albedo; +#else + bool isValid = bsdf.SampleBSDF(brdfContext, material, surface, bsdfSample, randomSeed); + isSpecular = bsdfSample.isLobe(LobeType::Specular); + bool hasTransmission = bsdfSample.isLobe(LobeType::Transmission); + + float3 faceNormalOriented = dot(brdfContext.ViewDirection, surface.FaceNormal) >= 0.0f ? surface.FaceNormal : -surface.FaceNormal; + + if (isValid) + direction = bsdfSample.wo; + else + break; + + throughput *= bsdfSample.isLobe(LobeType::Transmission) ? 1.f : surface.AO; + + // Update isEnter state when transmission occurs + if (hasTransmission) { + isEnter = !isEnter; + } else { + isEnter = dot(direction, faceNormalOriented) >= 0.0f; + } + + brdfWeight.diffuse = bsdfSample.isLobe(LobeType::DiffuseReflection) ? bsdfSample.weight : float3(0.f, 0.f, 0.f); +# if defined(RAW_RADIANCE) + brdfWeight.diffuse /= max(surface.DiffuseAlbedo, 1e-4f); +# endif + brdfWeight.specular = bsdfSample.isLobe(LobeType::SpecularReflection) ? bsdfSample.weight : float3(0.f, 0.f, 0.f); + brdfWeight.transmission = bsdfSample.isLobe(LobeType::Transmission) ? bsdfSample.weight : float3(0.f, 0.f, 0.f); + +# if defined(RAW_RADIANCE) + float3 brdfWeightOriginal = brdfWeight.diffuse * surface.DiffuseAlbedo + brdfWeight.specular + brdfWeight.transmission; + +#if defined(SHARC) && defined(SHARC_UPDATE) + const bool sharcUpdatePass = Frame.SHaRC.UpdatePass; +#else + const bool sharcUpdatePass = false; +#endif + + if (j > 0 || sharcUpdatePass) { + throughput *= brdfWeightOriginal; + } else { + float3 brdfWeightRaw = bsdfSample.weight; + + throughputDelta = brdfWeightOriginal / brdfWeightRaw; + + throughput *= brdfWeightRaw; + } +# else + throughput *= bsdfSample.weight; +# endif +#endif + +#if defined(SHARC) && defined(SHARC_UPDATE) + [branch] + if (Frame.SHaRC.UpdatePass) + { + SharcSetThroughput(sharcState, throughput); + } else +#endif + if (Frame.RussianRoulette) + { + float3 throughputColor; + +#if defined(RAW_RADIANCE) + throughputColor = throughput * throughputDelta; +#else + throughputColor = throughput; +#endif + const float rrVal = sqrt(Color::RGBToLuminance(throughputColor)); + float rrProb = saturate(0.85 - rrVal); + rrProb *= rrProb; + + rrProb = saturate(rrProb + max(0, ((float)j / (float)MAX_BOUNCES - 0.4f))); + + if (Random(randomSeed) < rrProb) + break; + + throughput /= (1.0f - rrProb); + } + +#if defined(SHARC) + materialRoughnessPrev += bsdfSample.isLobe(LobeType::Diffuse) ? 1.0f : surface.Roughness; +#endif + + ray.Origin = OffsetRay(surface.Position, faceNormalOriented, hasTransmission); + ray.Direction = direction; + ray.TMin = 0.0f; // OffsetRay already handles precision, no additional offset needed + ray.TMax = RAY_TMAX; + + payload.hitDistance = -1.0f; + payload.primitiveIndex = 0; + payload.PackBarycentrics(float2(0.0f, 0.0f)); + payload.PackInstanceGeometryIndex(0, 0); + payload.randomSeed = randomSeed; + + if (!bsdfSample.isLobe(LobeType::Delta)) + rayCone = RayCone::make(rayCone.getWidth(), min(rayCone.getSpreadAngle() + ComputeRayConeSpreadAngleExpansionByScatterPDF(bsdfSample.pdf), 2.0 * K_PI)); + + TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, 0xFF, DIFFUSE_RAY_HITGROUP_IDX, 0, DIFFUSE_RAY_MISS_IDX, ray, payload); + randomSeed = payload.randomSeed; + rayCone = rayCone.propagateDistance(payload.hitDistance); + + if (isSpecular) + specHitDist += payload.hitDistance; + + if (!payload.Hit()) + { + float3 skyIrradiance = SampleSky(direction) * Frame.Sky; + +#if defined(SHARC) && defined(SHARC_UPDATE) + [branch] + if (Frame.SHaRC.UpdatePass) + { + SharcUpdateMiss(sharcParameters, sharcState, skyIrradiance); + break; + } +#endif + + sampleRadiance += skyIrradiance * throughput; + break; + } + + float3 localPosition = ray.Origin + direction * payload.hitDistance; + + surface = Surface(localPosition, payload, direction, rayCone, instance, material); + +#if defined(SHARC) + sharcHitData.positionWorld = surface.Position; + sharcHitData.normalWorld = faceNormalOriented; + +# if SHARC_SEPARATE_EMISSIVE + sharcHitData.emissive = surface.Emissive; +# endif // SHARC_SEPARATE_EMISSIVE + + [branch] + if (!Frame.SHaRC.UpdatePass) + { + uint gridLevel = HashGridGetLevel(surface.Position, sharcParameters.gridParameters); + float voxelSize = HashGridGetVoxelSize(gridLevel, sharcParameters.gridParameters); + bool isValidHit = payload.hitDistance > voxelSize * sqrt(3.0f); + + if (isValidHit) { + materialRoughnessPrev = min(materialRoughnessPrev, 0.99f); + float a2 = materialRoughnessPrev * materialRoughnessPrev * materialRoughnessPrev * materialRoughnessPrev; + float footprint = payload.hitDistance * sqrt(0.5f * a2 / max(1.0f - a2, DIV_EPSILON)); + isValidHit &= footprint > voxelSize; + } + + float3 sharcRadiance; + if (isValidHit && SharcGetCachedRadiance(sharcParameters, sharcHitData, sharcRadiance, false)) + { + sampleRadiance += sharcRadiance * throughput; + break; + } + + } +#endif + + brdfContext = BRDFContext(surface, -direction); + if (dot(surface.FaceNormal, brdfContext.ViewDirection) < 0.0f) surface.FlipNormal(); + + AdjustShadingNormal(surface, brdfContext, true, false); // Adjusts the normal of the supplied shading frame to reduce black pixels due to back-facing view direction. + bsdf = StandardBSDF::make(surface, isEnter); + + float3 directRadiance = 0.0f; +#ifdef SUBSURFACE_SCATTERING + if (surface.SubsurfaceData.HasSubsurface != 0 && !isSssPath) { + directRadiance += EvaluateSubsurfaceNEE(surface, brdfContext, material, instance, payload, rayCone, randomSeed); + isSssPath = true; + } + else +#endif + directRadiance += EvaluateDirectRadiance(material, surface, brdfContext, instance, bsdf, randomSeed); + sampleRadiance += directRadiance * throughput; + +#if defined(SHARC) && defined(SHARC_UPDATE) + [branch] + if (Frame.SHaRC.UpdatePass) + { + if (!SharcUpdateHit(sharcParameters, sharcState, sharcHitData, directRadiance, Random(randomSeed))) + return; + + throughput = float3(1.0f, 1.0f, 1.0f); + } else +#endif + { + sampleRadiance += surface.Emissive * throughput; + } + } + + radiance += sampleRadiance; + +#if defined(SHARC) && defined(SHARC_UPDATE) + // SHaRC is single sample only and does not write to texture outputs + [branch] + if (Frame.SHaRC.UpdatePass) + { + return; + } +#endif + } + + radiance /= MAX_SAMPLES; + + const float2 envBRDF = BRDF::EnvBRDFApproxHirvonen(sourceSurface.Roughness, sourceBRDFContext.NdotV); + const float3 specularAlbedo = float3(sourceSurface.F0 * envBRDF.x + envBRDF.y); + +#if defined(PATH_TRACING) + OutputTexture[idx] = float4(direct + radiance, 0.0f); + DiffuseAlbedoPathTracing[idx] = float4(sourceSurface.DiffuseAlbedo, 1.0f); + NormalRoughnessPathTracing[idx] = float4(sourceSurface.Normal, sourceSurface.Roughness); +#else +# if defined(RAW_RADIANCE) + // Diffuse Output + OutputTexture[idx] = float4(isSpecular ? 0.0f : radiance, 1.0f); + + // Specular Output (Reused texture from DLSS RR) + SpecularAlbedo[idx] = float4(isSpecular ? radiance * specularAlbedo : 0.0f, specHitDist); +# else + OutputTexture[idx] = float4(LLGammaToTrueLinear(mainColor.rgb) + radiance, 1.0f); +# endif +#endif + +#if !defined(RAW_RADIANCE) + SpecularAlbedo[idx] = float4(specularAlbedo, 0.0f); + + SpecularHitDist[idx] = specHitDist; +#endif +} diff --git a/features/Raytracing/Shaders/Raytracing/GI/ShadowAnyHit.hlsl b/features/Raytracing/Shaders/Raytracing/GI/ShadowAnyHit.hlsl new file mode 100644 index 0000000000..609a3d0fa7 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/GI/ShadowAnyHit.hlsl @@ -0,0 +1,119 @@ +#include "Raytracing/Includes/Types.hlsli" +#include "Raytracing/Includes/Registers.hlsli" +#include "Raytracing/Includes/RT/CommonRT.hlsli" +#include "Raytracing/Includes/RT/Geometry.hlsli" +#include "Raytracing/Includes/PBR.hlsli" + +#include "Common/Color.hlsli" + +[shader("anyhit")] +void main(inout ShadowPayload payload, in BuiltInTriangleIntersectionAttributes attribs) +{ + Shape shape = GetShape(InstanceIndex(), GeometryIndex()); + + Vertex v0, v1, v2; + GetVertices(shape.GeometryIdx, PrimitiveIndex(), v0, v1, v2); + + float3 uvw = GetBary(attribs.barycentrics); + + Material material = shape.Material; + + float2 texCoord = material.TexCoord(Interpolate(v0.Texcoord0, v1.Texcoord0, v2.Texcoord0, uvw)); + + float alpha = Textures[NonUniformResourceIndex(material.BaseTexture())].SampleLevel(BaseSampler, texCoord, 0).a; + + alpha *= material.BaseColor().a; + + if ((material.ShaderFlags & ShaderFlags::kVertexAlpha) && !(material.ShaderFlags & ShaderFlags::kTreeAnim)) + { + alpha *= Interpolate(v0.Color.unpack().a, v1.Color.unpack().a, v2.Color.unpack().a, uvw); + } + + [branch] + if (material.AlphaFlags & AlphaFlags::kAlphaTest) + { + if (alpha < material.AlphaThreshold()) + { + IgnoreHit(); + } + } + + if (material.AlphaFlags & AlphaFlags::kAlphaBlend) + { + float rnd = Random(payload.randomSeed); + if (rnd > alpha) + { + IgnoreHit(); + } + } + + if (material.AlphaFlags & (AlphaFlags::kAlphaTest | AlphaFlags::kAlphaBlend)) + { + AcceptHitAndEndSearch(); + } + else if ((material.Feature == Feature::kGlowMap || material.PBRFlags & PBR::Flags::HasEmissive) && material.ShaderFlags & ShaderFlags::kAssumeShadowmask) // only window for now + { + float3 transmittance = 0.0f; + float3 F0 = 0.04f; + [branch] + if (material.Feature == Feature::kGlowMap) + { + transmittance = Textures[NonUniformResourceIndex(material.GlowTexture())].SampleLevel(BaseSampler, texCoord, 0).rgb; + [branch] + if (material.ShaderFlags & ShaderFlags::kSpecular) { + float3 specularColor = 0.0f; + + [branch] + if (material.ShaderFlags & ShaderFlags::kModelSpaceNormals) { + Texture2D specularTexture = Textures[NonUniformResourceIndex(material.SpecularTexture())]; + specularColor = specularTexture.SampleLevel(BaseSampler, texCoord, 0).r * material.SpecularColor().rgb * material.SpecularColor().a; + } else { + Texture2D normalTexture = Textures[NonUniformResourceIndex(material.NormalTexture())]; + specularColor = normalTexture.SampleLevel(BaseSampler, texCoord, 0).a * material.SpecularColor().rgb * material.SpecularColor().a; + } + F0 = clamp(0.08f * specularColor, 0.02f, 0.08f); + } + } + else + { + Texture2D rmaosTexture = Textures[NonUniformResourceIndex(material.RMAOSTexture())]; + Texture2D emissiveTexture = Textures[NonUniformResourceIndex(material.EmissiveTexture())]; + float specular = rmaosTexture.SampleLevel(BaseSampler, texCoord, 0).a; + float3 emissive = emissiveTexture.SampleLevel(BaseSampler, texCoord, 0).rgb; + transmittance = emissive; + F0 = material.SpecularLevel() * specular; + } + + Instance instance = GetInstance(InstanceIndex()); + float3x3 objectToWorld3x3 = mul((float3x3) instance.Transform, (float3x3) shape.Transform); + + float3 normalWS = normalize(mul(objectToWorld3x3, Interpolate(v0.Normal, v1.Normal, v2.Normal, uvw))); + float3 tangentWS = normalize(mul(objectToWorld3x3, Interpolate(v0.Tangent, v1.Tangent, v2.Tangent, uvw))); + float3 bitangentWS = normalize(mul(objectToWorld3x3, Interpolate(v0.Bitangent, v1.Bitangent, v2.Bitangent, uvw))); + + Texture2D normalTexture = Textures[NonUniformResourceIndex(material.NormalTexture())]; + float3 normal = normalTexture.SampleLevel(BaseSampler, texCoord, 0).xyz; + + float handedness = (dot(cross(normalWS, tangentWS), bitangentWS) < 0.0f) ? -1.0f : 1.0f; + + float3 Normal, Tangent, Bitangent; + + NormalMap( + normal, + handedness, + normalWS, tangentWS, bitangentWS, + Normal, Tangent, Bitangent + ); + + float3 viewDir = -normalize(WorldRayDirection()); + + float NdotV = abs(dot(Normal, viewDir)); + + float3 F = BRDF::F_Schlick(F0, NdotV); + transmittance *= (1.0f - F) / (1.0f + F); + + payload.transmission *= transmittance; + IgnoreHit(); + } +} + diff --git a/features/Raytracing/Shaders/Raytracing/GI/ShadowMiss.hlsl b/features/Raytracing/Shaders/Raytracing/GI/ShadowMiss.hlsl new file mode 100644 index 0000000000..20682fe1e1 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/GI/ShadowMiss.hlsl @@ -0,0 +1,7 @@ +#include "Raytracing/Includes/Types.hlsli" + +[shader("miss")] +void main(inout ShadowPayload payload) +{ + payload.missed = 1.0f; +} diff --git a/features/Raytracing/Shaders/Raytracing/Includes/AdvancedSettings.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/AdvancedSettings.hlsli new file mode 100644 index 0000000000..1ba7c9ebb5 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/AdvancedSettings.hlsli @@ -0,0 +1,20 @@ +#ifndef ADVANCED_SETTINGS_HLSL +#define ADVANCED_SETTINGS_HLSL + +#define DIFFUSE_MODE_LAMBERT 0 +#define DIFFUSE_MODE_BURLEY 1 +#define DIFFUSE_MODE_ORENNAYAR 2 +#define DIFFUSE_MODE_GOTANDA 3 +#define DIFFUSE_MODE_CHAN 4 + +#define LIGHTEVAL_MODE_DIFFUSE 0 +#define LIGHTEVAL_MODE_BRDF 1 + +#define LIGHTING_MODE_DIFFUSE 0 +#define LIGHTING_MODE_PBR 1 + +#ifndef RIS_MAX_CANDIDATES +#define RIS_MAX_CANDIDATES (4) +#endif + +#endif // ADVANCED_SETTINGS_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/ColorConversions.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/ColorConversions.hlsli new file mode 100644 index 0000000000..f201de147f --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/ColorConversions.hlsli @@ -0,0 +1,64 @@ +#ifndef COLOR_CONVERSIONS_COMMON_HLSLI +#define COLOR_CONVERSIONS_COMMON_HLSLI + +#include "Raytracing/Includes/Registers.hlsli" +#include "Raytracing/Includes/SharedData.hlsli" + +#define LLSETTINGS Frame.Features.LinearLighting +#define LLON LLSETTINGS.enableLinearLighting + +float3 ColorToLinear(float3 color) +{ + return pow(abs(color), (LLON ? LLSETTINGS.colorGamma : 2.2f)); +} + +float3 EffectToLinear(float3 color) +{ + return pow(abs(color), (LLON ? LLSETTINGS.effectGamma : 2.2f)) * (LLON ? LLSETTINGS.effectLightingMult : 1.0); +} + +float3 LightToLinear(float3 color) +{ + return pow(abs(color), LLSETTINGS.lightGamma); +} + +float3 PointLightToLinear(float3 color, bool isLinear) +{ + return (isLinear && LLON) ? color : LightToLinear(color) * LLSETTINGS.pointLightMult; +} + +float3 DirLightToLinear(float3 color) +{ + return (LLSETTINGS.isDirLightLinear && LLON) ? color : LightToLinear(color) * LLSETTINGS.directionalLightMult * LLSETTINGS.dirLightMult; +} + +float3 GlowToLinear(float3 color) +{ + return LLON ? pow(abs(color), LLSETTINGS.glowmapGamma) * LLSETTINGS.glowmapMult : color; +} + +float3 VanillaDiffuseColor(float3 color) +{ + return saturate(ColorToLinear(color) * LLSETTINGS.vanillaDiffuseColorMult); +} + +float3 LLGammaToTrueLinear(float3 color) +{ + return LLON ? color : pow(abs(color), 2.2f); +} + +float3 LLTrueLinearToGamma(float3 color) +{ + return LLON ? color : pow(abs(color), 1.0f / 2.2f); +} + +float3 EmitColorToLinear(float3 color) +{ + return LLON ? (pow(abs(color), LLSETTINGS.emitColorGamma)) : color; +} + +float EmitColorMult() +{ + return LLON ? LLSETTINGS.emitColorMult : 1.0f; +} +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Common.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Common.hlsli new file mode 100644 index 0000000000..f362ad1166 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Common.hlsli @@ -0,0 +1,194 @@ +#ifndef COMMON_HLSL +#define COMMON_HLSL + +#include "Common/Game.hlsli" + +#define DEPTH_SCALE (0.99920h) + +#define FP_Z (0.001f) +#define SKY_Z (0.9999f) + +#define FP_VIEW_Z (16.5f) + +#define M_TO_GAME_UNIT (1.0f / (GAME_UNIT_TO_M)) + +float ScreenToViewDepth(const float screenDepth, float4 cameraData) +{ + return (cameraData.w / (-screenDepth * cameraData.z + cameraData.x)); +} + +float3 ScreenToViewPosition(const float2 screenPos, const float viewspaceDepth, const float4 ndcToView) +{ + float3 ret; + ret.xy = (ndcToView.xy * screenPos.xy + ndcToView.zw) * viewspaceDepth; + ret.z = viewspaceDepth; + return ret; +} + +float3 ViewToWorldPosition(const float3 pos, const float4x4 invView) +{ + float4 worldpos = mul(invView, float4(pos, 1)); + return worldpos.xyz / worldpos.w; +} + +float3 ViewToWorldVector(const float3 vec, const float4x4 invView) +{ + return mul((float3x3)invView, vec); +} + +float Remap(float x, float min, float max) +{ + return clamp(min + saturate(x) * (max - min), min, max); +} + +inline float Square(float value) +{ + return value * value; +} + +half3 DecodeNormal(half2 f) +{ + f = f * 2.0 - 1.0; + // https://twitter.com/Stubbesaurus/status/937994790553227264 + half3 n = half3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y)); + half t = saturate(-n.z); + #if !defined(DX11) + n.xy += select(n.xy >= 0.0, -t, t); + #else + n.xy += n.xy >= 0.0 ? -t : t; + #endif + return -normalize(n); +} + +void NormalMap(float3 normalMap, float handedness, float3 geomNormalWS, float3 geomTangentWS, float3 geomBitangentWS, out float3 normalWS, out float3 tangentWS, out float3 bitangentWS) +{ + normalMap = normalMap * 2.0f - 1.0f; + + normalWS = normalMap.x * geomTangentWS + normalMap.y * geomBitangentWS + normalMap.z * geomNormalWS; + + float normalLengthSq = dot(normalWS, normalWS); + normalWS = (normalLengthSq > 1e-6f) ? (normalWS * rsqrt(normalLengthSq)) : geomNormalWS; + + tangentWS = normalize(geomTangentWS - normalWS * dot(geomTangentWS, normalWS)); + bitangentWS = cross(normalWS, tangentWS) * handedness; +} + +uint StrongIntegerHash(uint x) +{ + // From https://github.com/skeeto/hash-prospector + // Current best hash in this form: https://github.com/skeeto/hash-prospector/issues/19#issuecomment-1105792898 + // bias = 0.10734781817103507 + x ^= x >> 16; + x *= 0x21f0aaad; + x ^= x >> 15; + x *= 0xf35a2d97; + x ^= x >> 15; + return x; +} + +uint4 SamplerCore(inout uint seed) +{ + uint4 result = uint4(StrongIntegerHash(seed + 0), + StrongIntegerHash(seed + 1), + StrongIntegerHash(seed + 2), + StrongIntegerHash(seed + 3)); + seed += 4; + return result; +} + +float2 Get2D(inout uint seed) +{ + return (SamplerCore(seed).xy) * 5.96046447754e-08; +} + +// I keep it here because it is also used by DX11 to make the Diffuse Albedo texture from 'True Albedo' +void UnpackMAO(float packed, out float metalness, out float ao) +{ + uint metalnessAO = packed * 65535.0; + + metalness = saturate((metalnessAO & 0xFF) / 255.0f); + ao = saturate(((metalnessAO >> 8) & 0xFF) / 255.0f); +} + +float ShadowTerminatorTerm(float3 L, float3 N, float3 Ns) +{ + // Disney terminator softening: + // "Taming the Shadow Terminator" + // Matt Jen-Yuan Chiang, Yining Karl Li, and Brent Burley + // SIGGRAPH 2019 Talks + // https://www.yiningkarlli.com/projects/shadowterminator.html + const float NoL = saturate(dot(N, L)); + const float NgoL = saturate(dot(Ns, L)); + const float NgoN = saturate(dot(Ns, N)); + const float G = saturate(NgoL / (NoL * NgoN + 1e-6)); + return G + G * (G - G * G); // smooth +} + +float F0toIOR(float3 F0) +{ + float f0 = max(max(F0.r, F0.g), F0.b); + return (1.0 + sqrt(f0)) / (1.0 - sqrt(f0)); +} + +// Compute the cosine of the angle of refraction with respect to the surface +// normal, given the cosine of the angle of incidence with respect to the +// surface normal and the relative index of refraction at the interface +// (IOR of incident medium over that of the refracting medium). Both angles +// are measured with respect to the same surface normal. In case of total +// internal reflection, the return value is zero. +float ComputeCosThetaRefracted(float eta, float cosTheta) +{ + float cos2ThetaRefracted = 1 - eta * eta * (1 - cosTheta * cosTheta); + return -sign(cosTheta) * sqrt(max(cos2ThetaRefracted, 0.0)); +} + +// Compute surface reflectance using the Fresnel equations given a relative +// index of refraction and cosines of the angles of the incident and refracted +// rays with respect to the surface normal. Both angles are measured with +// respect to the same surface normal. +float FresnelDielectric(float eta, float cosTheta1, float cosTheta2) +{ + float ks = eta * cosTheta1; + float sqrtRs = (ks + cosTheta2) / (ks - cosTheta2); + float kp = eta * cosTheta2; + float sqrtRp = (kp + cosTheta1) / (kp - cosTheta1); + return 0.5 * (sqrtRs * sqrtRs + sqrtRp * sqrtRp); +} + +float FresnelDielectric(float eta, float cosTheta1) +{ + float cosTheta2 = ComputeCosThetaRefracted(eta, cosTheta1); + return FresnelDielectric(eta, cosTheta1, cosTheta2); +} + +// Copyright 2019 Google LLC. +// SPDX-License-Identifier: Apache-2.0 + +// Polynomial approximation in GLSL for the Turbo colormap +// Original LUT: https://gist.github.com/mikhailov-work/ee72ba4191942acecc03fe6da94fc73f + +// Authors: +// Colormap Design: Anton Mikhailov (mikhailov@google.com) +// GLSL Approximation: Ruofei Du (ruofei@google.com) + +// See also: https://ai.googleblog.com/2019/08/turbo-improved-rainbow-colormap-for.html + +float3 TurboColormap(float x) +{ + const float4 kRedVec4 = float4(0.13572138, 4.61539260, -42.66032258, 132.13108234); + const float4 kGreenVec4 = float4(0.09140261, 2.19418839, 4.84296658, -14.18503333); + const float4 kBlueVec4 = float4(0.10667330, 12.64194608, -60.58204836, 110.36276771); + const float2 kRedVec2 = float2(-152.94239396, 59.28637943); + const float2 kGreenVec2 = float2(4.27729857, 2.82956604); + const float2 kBlueVec2 = float2(-89.90310912, 27.34824973); + + x = saturate(x); + float4 v4 = float4(1.0, x, x * x, x * x * x); + float2 v2 = v4.zw * v4.z; + return float3( + dot(v4, kRedVec4) + dot(v2, kRedVec2), + dot(v4, kGreenVec4) + dot(v2, kGreenVec2), + dot(v4, kBlueVec4) + dot(v2, kBlueVec2)); +} + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/BSDF.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/BSDF.hlsli new file mode 100644 index 0000000000..f062e68c38 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/BSDF.hlsli @@ -0,0 +1,836 @@ +// Based on Falcor's BSDF implementation + +#ifndef __BSDF_HLSLI__ +#define __BSDF_HLSLI__ + +#include "Common/BRDF.hlsli" +#include "Raytracing/Includes/MathHelpers.hlsli" +#include "Raytracing/Includes/Surface.hlsli" + +#include "Raytracing/Includes/Materials/Fresnel.hlsli" +#include "Raytracing/Includes/Materials/LobeType.hlsli" +#include "Raytracing/Includes/Materials/Microfacet.hlsli" + +#include "Raytracing/Includes/Materials/HairChiangBSDF.hlsli" +#include "Raytracing/Includes/Materials/HairFarFieldBCSDF.hlsli" + +#define HAIR_MODE_CHIANG_BSDF 1 +#define HAIR_MODE_FARFIELD_BCSDF 2 + +// Minimum cos(theta) for the incident and outgoing vectors. +// Some BSDF functions are not robust for cos(theta) == 0.0, +// so using a small epsilon for consistency. +static const float kMinCosTheta = 1e-6f; + +// We clamp the GGX width parameter to avoid numerical instability. +// In some computations, we can avoid clamps etc. if 1.0 - alpha^2 != 1.0, so the epsilon should be 1.72666361e-4 or larger in fp32. +// The the value below is sufficient to avoid visible artifacts. +static const float kMinGGXAlpha = 0.0064f; + +static const uint cMaxDeltaLobes = 3; // 3 should be enough (reflection, transmission, clearcoat reflection?) - there's a bit of a register use cost allowing for more than needed +// This represents delta lobe properties with respect to the surface Wi and surface properties (material settings, texture, normal map, etc.) +struct DeltaLobe +{ + float3 thp; // how much light goes through the lobe with respect to the surface Wi and this->Wo; will be 0.xxx if probability == 0 + float probability; // chance this lobe is sampled with current BSDF importance sampling; will be 0 if disabled; + float3 dir; // refracted or reflected direction in world space when returned from StandardBSDF (tangent space when returned from FalcorBSDF); will be 0.xxx if probability == 0; this is where the ray "will go" in unidirectional path tracing + int transmission; // 1 when transmission lobe, 0 when reflection; even though it can be inferred from Wo, this avoids testing Wo vs triangle normal and potential precision issues + + static DeltaLobe make() { DeltaLobe ret; ret.thp = 0.xxx; ret.dir = 0.xxx; ret.transmission = false; ret.probability = 0; return ret; } +}; + +/** Describes a BSDF sample. +*/ +struct BSDFSample +{ + float3 wo; ///< Sampled direction in world space (normalized). + float pdf; ///< pdf with respect to solid angle for the sampled direction (wo). + float3 weight; ///< Sample weight f(wi, wo) * dot(wo, n) / pdf(wo). + uint lobe; ///< Sampled lobe. This is a combination of LobeType flags (see LobeType.hlsli). + float lobeP; ///< Probability that this lobe sample was picked (including each split between reflection/refraction). + + bool isLobe(LobeType type) + { + return (lobe & ((uint)type)) != 0; + } + + // If delta lobe, returns an unique 2-bit delta lobe identifier (0...3); if not delta lobe returns 0xFFFFFFFF + // NOTE: this ID must match delta lobe index used in IBSDF::evalDeltaLobes + uint getDeltaLobeIndex() + { + if ((lobe & (uint)LobeType::Delta) == 0u) + return 0xFFFFFFFF; + return (lobe & (uint)LobeType::Transmission) == 0u; // if transmission return 0, if reflection return 1; TODO: when clearcoat gets added, use 2 for clearcoat reflection + } +}; + +// Helper functions for BSDFs +float3 Diffuse(float roughness, float3 N, float3 V, float3 L, float NdotV, float NdotL, float VdotH, float VdotL, float NdotH) +{ +#if DIFFUSE_MODE == DIFFUSE_MODE_BURLEY + return BRDF::Diffuse_Burley(roughness, NdotV, NdotL, VdotH); +#elif DIFFUSE_MODE == DIFFUSE_MODE_ORENNAYAR + return BRDF::Diffuse_OrenNayar(roughness, N, V, L, NdotV, NdotL); +#elif DIFFUSE_MODE == DIFFUSE_MODE_GOTANDA + return BRDF::Diffuse_Gotanda(roughness, NdotV, NdotL, VdotL); +#elif DIFFUSE_MODE == DIFFUSE_MODE_CHAN + return BRDF::Diffuse_Chan(roughness, NdotV, NdotL, VdotH, NdotH); +#else + return BRDF::Diffuse_Lambert(); +#endif +} + +struct DiffuseReflection +{ + float3 albedo; + float roughness; + + float3 Eval(const float3 wi, const float3 wo) + { + if (min(wi.z, wo.z) <= kMinCosTheta) + return float3(0.0f, 0.0f, 0.0f); + + return EvalWeight(wo, wi) * wo.z * K_1_PI; + } + + bool SampleBSDF(const float3 wi, out float3 wo, out float pdf, out float3 weight, out uint lobe, out float lobeP, float4 preGeneratedSample) + { + wo = sample_cosine_hemisphere_concentric(preGeneratedSample.xy, pdf); + lobe = (uint)LobeType::DiffuseReflection; + + if (min(wo.z, wi.z) <= kMinCosTheta) + { + weight = float3(0.0f, 0.0f, 0.0f); + lobeP = 0.0f; + return false; + } + + weight = EvalWeight(wi, wo); + lobeP = 1.0f; + return true; + } + + float EvalPdf(const float3 wi, const float3 wo) + { + if (min(wi.z, wo.z) < kMinCosTheta) return 0.f; + + return K_1_PI * wo.z; + } + + float3 EvalWeight(float3 wo, float3 wi) + { + const float3 N = float3(0.0f, 0.0f, 1.0f); + const float NdotV = saturate(wo.z); + const float NdotL = saturate(wi.z); + const float3 H = normalize(wo + wi); + const float VdotH = max(saturate(dot(wo, H)), kMinCosTheta); + const float VdotL = saturate(dot(wo, wi)); + const float NdotH = saturate(H.z); + + return albedo * Diffuse(roughness, N, wo, wi, NdotV, NdotL, VdotH, VdotL, NdotH) * K_PI; + } +}; + +struct DiffuseTransmissionLambert +{ + float3 albedo; + + float3 Eval(const float3 wi, const float3 wo) + { + if (min(wi.z, -wo.z) < kMinCosTheta) + return float3(0,0,0); + + return K_1_PI * albedo * -wo.z; + } + + bool SampleBSDF(const float3 wi, out float3 wo, out float pdf, out float3 weight, out uint lobe, out float lobeP, const float4 preGeneratedSample) + { + wo = sample_cosine_hemisphere_concentric(preGeneratedSample.xy, pdf); + wo.z = -wo.z; + lobe = (uint)LobeType::DiffuseTransmission; + + if (min(wi.z, -wo.z) < kMinCosTheta) + { + weight = float3(0,0,0); + lobeP = 0.0; + return false; + } + + weight = albedo; + lobeP = 1.0; + return true; + } + + float EvalPdf(const float3 wi, const float3 wo) + { + if (min(wi.z, -wo.z) < kMinCosTheta) return 0.f; + + return K_1_PI * -wo.z; + } +}; + +struct SpecularReflectionMicrofacet // : IBxDF +{ + float3 albedo; ///< Specular albedo. + float alpha; ///< GGX width parameter. + uint activeLobes; ///< BSDF lobes to include for sampling and evaluation. See LobeType.hlsli. + + bool hasLobe(LobeType lobe) { return (activeLobes & (uint)lobe) != 0; } + + float3 Eval(const float3 wi, const float3 wo) + { + if (min(wi.z, wo.z) < kMinCosTheta) return float3(0,0,0); + + // Handle delta reflection. + if (alpha == 0.f) return float3(0,0,0); + + if (!hasLobe(LobeType::SpecularReflection)) return float3(0,0,0); + + float3 h = normalize(wi + wo); + float wiDotH = dot(wi, h); + + float D = evalNdfGGX(alpha, h.z); + float G = evalMaskingSmithGGXCorrelated(alpha, wi.z, wo.z); + float3 F = evalFresnelSchlick(albedo, 1.f, wiDotH); + return F * D * G * 0.25f / wi.z; + } + + bool SampleBSDF(const float3 wi, out float3 wo, out float pdf, out float3 weight, out uint lobe, out float lobeP, const float4 preGeneratedSample) + { + wo = float3(0,0,0); + weight = float3(0,0,0); + pdf = 0.f; + lobe = (uint)LobeType::SpecularReflection; + lobeP = 1.0; + + if (wi.z < kMinCosTheta) return false; + + // Handle delta reflection. + if (alpha == 0.f) + { + if (!hasLobe(LobeType::DeltaReflection)) return false; + + wo = float3(-wi.x, -wi.y, wi.z); + pdf = 0.f; + weight = evalFresnelSchlick(albedo, 1.f, wi.z); + lobe = (uint)LobeType::DeltaReflection; + return true; + } + + if (!hasLobe(LobeType::SpecularReflection)) return false; + + // SampleBSDF the GGX distribution to find a microfacet normal (half vector). + float3 h = sampleGGX_VNDF(alpha, wi, preGeneratedSample.xy); // pdf = G1(wi) * D(h) * max(0,dot(wi,h)) / wi.z + + float wiDotH = dot(wi, h); + wo = 2.f * wiDotH * h - wi; + if (wo.z < kMinCosTheta) return false; + + pdf = EvalPdf(wi, wo); // We used to have pdf returned as part of the sampleGGX_XXX functions but this made it easier to add bugs when changing due to code duplication in refraction cases + weight = Eval(wi, wo) / pdf; + lobe = (uint)LobeType::SpecularReflection; + return true; + } + + float EvalPdf(const float3 wi, const float3 wo) + { + if (min(wi.z, wo.z) < kMinCosTheta) return 0.f; + + // Handle delta reflection. + if (alpha == 0.f) return 0.f; + + if (!hasLobe(LobeType::SpecularReflection)) return 0.f; + + float3 h = normalize(wi + wo); + float pdf = evalPdfGGX_VNDF(alpha, wi, h); + + return pdf; + } +}; + +struct SpecularReflectionTransmissionMicrofacet +{ + float3 transmissionAlbedo; ///< Transmission albedo. + float alpha; ///< GGX width parameter. + float eta; ///< Relative index of refraction (etaI / etaT). + uint activeLobes; ///< BSDF lobes to include for sampling and evaluation. See LobeType.hlsli. + bool isThinSurface; ///< Hack refraction (but not reflection) eta to 1 + + bool hasLobe(LobeType lobe) { return (activeLobes & (uint)lobe) != 0; } + + float3 Eval(const float3 wi, const float3 wo) + { + if (min(wi.z, abs(wo.z)) < kMinCosTheta) return float3(0,0,0); + + // Handle delta transmission. + if (alpha == 0.f) return float3(0,0,0); + + const bool hasReflection = hasLobe(LobeType::SpecularReflection); + const bool hasTransmission = hasLobe(LobeType::SpecularTransmission); + const bool isReflection = wo.z > 0.f; + if ((isReflection && !hasReflection) || (!isReflection && !hasTransmission)) return float3(0,0,0); + + // hack refraction for isThinSurface as the flag means we've entered and left the really thin volume + float actualEta = (isThinSurface && !isReflection)?(1.0f):(eta); + + // Compute half-vector and make sure it's in the upper hemisphere. + float3 h = normalize(wo + wi * (isReflection ? 1.f : actualEta)); + h *= float(sign(h.z)); + + float wiDotH = dot(wi, h); + float woDotH = dot(wo, h); + + float D = evalNdfGGX(alpha, h.z); + float G = evalMaskingSmithGGXCorrelated(alpha, wi.z, abs(wo.z)); + float F = evalFresnelDielectric(actualEta, wiDotH); + + if (isReflection) + { + return F * D * G * 0.25f / wi.z; + } + else + { + float sqrtDenom = woDotH + actualEta * wiDotH; + float t = actualEta * actualEta * wiDotH * woDotH / (wi.z * sqrtDenom * sqrtDenom); + return transmissionAlbedo * (1.f - F) * D * G * abs(t); + } + } + + bool SampleBSDF(const float3 wi, out float3 wo, out float pdf, out float3 weight, out uint lobe, out float lobeP, const float4 preGeneratedSample) + { + wo = float3(0,0,0); + weight = float3(0,0,0); + pdf = 0.f; + lobe = (uint)LobeType::SpecularReflection; + lobeP = 1; + + if (wi.z < kMinCosTheta) return false; + + float lobeSample = preGeneratedSample.z; + + // Handle delta reflection/transmission. + [branch] + if (alpha == 0.f) + { + const bool hasReflection = hasLobe(LobeType::DeltaReflection); + const bool hasTransmission = hasLobe(LobeType::DeltaTransmission); + if (!(hasReflection || hasTransmission)) return false; + + float cosThetaT; + float F = evalFresnelDielectric(eta, wi.z, cosThetaT); + // TODO: adjust F for thin surface hack + + bool isReflection = hasReflection; + if (hasReflection && hasTransmission) + { + isReflection = lobeSample < F; + lobeP = (isReflection)?(F):(1-F); + } + else if (hasTransmission && F == 1.f) + { + return false; + } + + // hack refraction for isThinSurface as the flag means we've entered and left the really thin volume + float actualEta = eta; + if (isThinSurface && !isReflection) + { + actualEta = 1.0; + F = evalFresnelDielectric(actualEta, wi.z, cosThetaT); + } + + pdf = 0.f; + weight = isReflection ? float3(1,1,1) : transmissionAlbedo; + if (!(hasReflection && hasTransmission)) weight *= float3( (isReflection ? F : 1.f - F).xxx ); + wo = isReflection ? float3(-wi.x, -wi.y, wi.z) : float3(-wi.x * actualEta, -wi.y * actualEta, -cosThetaT); + lobe = isReflection ? (uint)LobeType::DeltaReflection : (uint)LobeType::DeltaTransmission; + + if (abs(wo.z) < kMinCosTheta || (wo.z > 0.f != isReflection)) return false; + + return true; + } + + const bool hasReflection = hasLobe(LobeType::SpecularReflection); + const bool hasTransmission = hasLobe(LobeType::SpecularTransmission); + if (!(hasReflection || hasTransmission)) return false; + + float3 h = sampleGGX_VNDF(alpha, wi, preGeneratedSample.xy); // pdf = G1(wi) * D(h) * max(0,dot(wi,h)) / wi.z + + // Reflect/refract the incident direction to find the outgoing direction. + float wiDotH = dot(wi, h); + + float cosThetaT; + float F = evalFresnelDielectric(eta, wiDotH, cosThetaT); + + bool isReflection = hasReflection; + if (hasReflection && hasTransmission) + { + isReflection = lobeSample < F; + } + else if (hasTransmission && F == 1.f) + { + return false; + } + + // hack refraction for isThinSurface as the flag means we've entered and left the really thin volume + float actualEta = eta; + if (isThinSurface && !isReflection) + { + actualEta = 1.0; + F = evalFresnelDielectric(actualEta, wi.z, cosThetaT); + } + + wo = isReflection ? + (2.f * wiDotH * h - wi) : + ((actualEta * wiDotH - cosThetaT) * h - actualEta * wi); + + if (abs(wo.z) < kMinCosTheta || (wo.z > 0.f != isReflection)) return false; + + float woDotH = dot(wo, h); + + lobe = isReflection ? (uint)LobeType::SpecularReflection : (uint)LobeType::SpecularTransmission; + + pdf = EvalPdf(wi, wo); // <- this will have the correct Jacobian applied (for correct refraction pdf); We used to have pdf returned as part of the sampleGGX_XXX functions but this made it easier to add bugs when changing due to code duplication in refraction cases + weight = pdf > 0.f ? Eval(wi, wo) / pdf : float3(0, 0, 0); + return true; + } + + float EvalPdf(const float3 wi, const float3 wo) + { + if (min(wi.z, abs(wo.z)) < kMinCosTheta) return 0.f; + + // Handle delta reflection/transmission. + if (alpha == 0.f) return 0.f; + + bool isReflection = wo.z > 0.f; + const bool hasReflection = hasLobe(LobeType::SpecularReflection); + const bool hasTransmission = hasLobe(LobeType::SpecularTransmission); + if ((isReflection && !hasReflection) || (!isReflection && !hasTransmission)) return 0.f; + + // hack refraction for isThinSurface as the flag means we've entered and left the really thin volume + float actualEta = (isThinSurface && !isReflection)?(1.0f):(eta); + + // Compute half-vector and make sure it's in the upper hemisphere. + float3 h = normalize(wo + wi * (isReflection ? 1.f : actualEta)); + h *= float(sign(h.z)); + + float wiDotH = dot(wi, h); + float woDotH = dot(wo, h); + + float F = evalFresnelDielectric(actualEta, wiDotH); + + float pdf = evalPdfGGX_VNDF(alpha, wi, h); + + if (isReflection) + { // Jacobian of the reflection operator. + if (woDotH <= 0.f) return 0.f; + pdf *= wiDotH / woDotH; + } + else + { // Jacobian of the refraction operator. + if (woDotH > 0.f) return 0.f; + pdf *= wiDotH * 4.0f; + float sqrtDenom = woDotH + actualEta * wiDotH; + float denom = sqrtDenom * sqrtDenom; + pdf *= abs(woDotH) / denom; + } + + if (hasReflection && hasTransmission) + { + pdf *= isReflection ? F : 1.f - F; + } + + return clamp(pdf, 0, FLT_MAX); + } +}; + +struct DefaultBSDF +{ + DiffuseReflection diffuseReflection; + DiffuseTransmissionLambert diffuseTransmission; + SpecularReflectionMicrofacet specularReflection; + SpecularReflectionTransmissionMicrofacet specularReflectionTransmission; + + float diffTrans; ///< Mix between diffuse BRDF and diffuse BTDF. + float specTrans; ///< Mix between dielectric BRDF and specular BSDF. + + float pDiffuseReflection; ///< Probability for sampling the diffuse BRDF. + float pDiffuseTransmission; ///< Probability for sampling the diffuse BTDF. + float pSpecularReflection; ///< Probability for sampling the specular BRDF. + float pSpecularReflectionTransmission; ///< Probability for sampling the specular BSDF. + + void __init(float3 N, float3 V, Surface surface, bool isEnter = true) + { + bool isThinSurface = false; // Not used currently + + float3 transmissionAlbedo = surface.TransmissionColor; + float surfaceRoughness = saturate(surface.Roughness); + + diffuseReflection.albedo = surface.DiffuseAlbedo; + diffuseReflection.roughness = surfaceRoughness; + diffuseTransmission.albedo = transmissionAlbedo; + + float alpha = surfaceRoughness * surfaceRoughness; + if (alpha < kMinGGXAlpha) alpha = 0.f; + + uint activeLobes = (uint)LobeType::DiffuseReflection | (uint)LobeType::SpecularReflection; + if (transmissionAlbedo.r > 0.f || transmissionAlbedo.g > 0.f || transmissionAlbedo.b > 0.f) + { + activeLobes |= (uint)LobeType::DiffuseTransmission | (uint)LobeType::SpecularTransmission | (uint)LobeType::DeltaTransmission; + } + + float3 surfaceSpecular = surface.F0; + float surfaceIoR = surface.IOR; + float surfaceEta = isEnter ? (1.f / surfaceIoR) : surfaceIoR; + + specularReflection.albedo = surfaceSpecular; + specularReflection.alpha = alpha; + specularReflection.activeLobes = activeLobes; + + specularReflectionTransmission.transmissionAlbedo = transmissionAlbedo; + specularReflectionTransmission.alpha = surfaceEta == 1.f ? 0.f : alpha; + specularReflectionTransmission.eta = surfaceEta; + specularReflectionTransmission.activeLobes = activeLobes; + specularReflectionTransmission.isThinSurface = isThinSurface; + + diffTrans = surface.DiffTrans; + specTrans = surface.SpecTrans; + + float surfaceMetallic = surface.Metallic; + float metallicBRDF = surfaceMetallic * (1.f - specTrans); + float dielectricBSDF = (1.f - surfaceMetallic) * (1.f - specTrans); + float specularBSDF = specTrans; + + float diffuseWeight = Luminance(surface.DiffuseAlbedo); + float specularWeight = Luminance(evalFresnelSchlick(surfaceSpecular, 1.f, dot(V, N))); + + pDiffuseReflection = (activeLobes & (uint)LobeType::DiffuseReflection) ? diffuseWeight * dielectricBSDF * (1.f - diffTrans) : 0.f; + pDiffuseTransmission = (activeLobes & (uint)LobeType::DiffuseTransmission) ? diffuseWeight * dielectricBSDF * diffTrans : 0.f; + pSpecularReflection = (activeLobes & ((uint)LobeType::SpecularReflection | (uint)LobeType::DeltaReflection)) ? specularWeight * (metallicBRDF + dielectricBSDF) : 0.f; + pSpecularReflectionTransmission = (activeLobes & ((uint)LobeType::SpecularReflection | (uint)LobeType::DeltaReflection | (uint)LobeType::SpecularTransmission | (uint)LobeType::DeltaTransmission)) ? specularBSDF : 0.f; + + float normFactor = pDiffuseReflection + pDiffuseTransmission + pSpecularReflection + pSpecularReflectionTransmission; + if (normFactor > 0.f) + { + normFactor = 1.f / normFactor; + pDiffuseReflection *= normFactor; + pDiffuseTransmission *= normFactor; + pSpecularReflection *= normFactor; + pSpecularReflectionTransmission *= normFactor; + } + } + + static DefaultBSDF make(float3 N, float3 V, Surface surface, bool isEnter = true) + { + DefaultBSDF bsdf; + bsdf.__init(N, V, surface, isEnter); + return bsdf; + } + + static uint getLobes(Surface surface) + { + float surfaceRoughness = saturate(surface.Roughness); + float alpha = surfaceRoughness * surfaceRoughness; + bool isDelta = alpha < kMinGGXAlpha; + + float diffTrans = surface.DiffTrans; + float specTrans = surface.SpecTrans; + + uint lobes = isDelta ? (uint)LobeType::DeltaReflection : (uint)LobeType::SpecularReflection; + if (any(surface.DiffuseAlbedo > 0.f) && specTrans < 1.f) + { + if (diffTrans < 1.f) lobes |= (uint)LobeType::DiffuseReflection; + if (diffTrans > 0.f) lobes |= (uint)LobeType::DiffuseTransmission; + } + if (specTrans > 0.f) lobes |= isDelta ? (uint)LobeType::DeltaTransmission : (uint)LobeType::SpecularTransmission; + + return lobes; + } + + float4 Eval(const float3 wi, const float3 wo) + { + float3 diffuse = 0.f; float3 specular = 0.f; + if (pDiffuseReflection > 0.f) diffuse += (1.f - specTrans) * (1.f - diffTrans) * diffuseReflection.Eval(wi, wo); // <- this isn't correct; diffuse has a specular component that should be considered + if (pDiffuseTransmission > 0.f) diffuse += (1.f - specTrans) * diffTrans * diffuseTransmission.Eval(wi, wo); + if (pSpecularReflection > 0.f) specular += (1.f - specTrans) * specularReflection.Eval(wi, wo); + if (pSpecularReflectionTransmission > 0.f) specular += specTrans * (specularReflectionTransmission.Eval(wi, wo)); // <- do we want to consider transmission as specular? this depends entirely on denoiser - should ask RR folks + + return float4(diffuse+specular, Average(specular)); // use average instead of sum to avoid hitting fp16 ceiling early + } + + bool SampleBSDF(const float3 wi, out float3 wo, out float pdf, out float3 weight, out uint lobe, out float lobeP, const float4 preGeneratedSample) + { + wo = float3(0,0,0); + weight = float3(0,0,0); + pdf = 0.f; + lobe = (uint)LobeType::DiffuseReflection; + lobeP = 0.0; + + bool valid = false; + + float uSelect = preGeneratedSample.z; + + if (uSelect < pDiffuseReflection) + { + valid = diffuseReflection.SampleBSDF(wi, wo, pdf, weight, lobe, lobeP, preGeneratedSample); + weight /= pDiffuseReflection; + weight *= (1.f - specTrans) * (1.f - diffTrans); + pdf *= pDiffuseReflection; + lobeP *= pDiffuseReflection; + if (pSpecularReflection > 0.f) pdf += pSpecularReflection * specularReflection.EvalPdf(wi, wo); + if (pSpecularReflectionTransmission > 0.f) pdf += pSpecularReflectionTransmission * specularReflectionTransmission.EvalPdf(wi, wo); + } + else if (uSelect < pDiffuseReflection + pDiffuseTransmission) + { + valid = diffuseTransmission.SampleBSDF(wi, wo, pdf, weight, lobe, lobeP, preGeneratedSample); + weight /= pDiffuseTransmission; + weight *= (1.f - specTrans) * diffTrans; + pdf *= pDiffuseTransmission; + lobeP *= pDiffuseTransmission; + if (pSpecularReflectionTransmission > 0.f) pdf += pSpecularReflectionTransmission * specularReflectionTransmission.EvalPdf(wi, wo); + } + else if (uSelect < pDiffuseReflection + pDiffuseTransmission + pSpecularReflection) + { + valid = specularReflection.SampleBSDF(wi, wo, pdf, weight, lobe, lobeP, preGeneratedSample); + weight /= pSpecularReflection; + weight *= (1.f - specTrans); + pdf *= pSpecularReflection; + lobeP *= pSpecularReflection; + if (pDiffuseReflection > 0.f) pdf += pDiffuseReflection * diffuseReflection.EvalPdf(wi, wo); + if (pSpecularReflectionTransmission > 0.f) pdf += pSpecularReflectionTransmission * specularReflectionTransmission.EvalPdf(wi, wo); + } + else if (pSpecularReflectionTransmission > 0.f) + { + valid = specularReflectionTransmission.SampleBSDF(wi, wo, pdf, weight, lobe, lobeP, preGeneratedSample); + weight /= pSpecularReflectionTransmission; + weight *= specTrans; + pdf *= pSpecularReflectionTransmission; + lobeP *= pSpecularReflectionTransmission; + if (pDiffuseReflection > 0.f) pdf += pDiffuseReflection * diffuseReflection.EvalPdf(wi, wo); + if (pDiffuseTransmission > 0.f) pdf += pDiffuseTransmission * diffuseTransmission.EvalPdf(wi, wo); + if (pSpecularReflection > 0.f) pdf += pSpecularReflection * specularReflection.EvalPdf(wi, wo); + } + + if( !valid || (lobe & (uint)LobeType::Delta) != 0 ) + pdf = 0.0; + + return valid; + } + + float EvalPdf(const float3 wi, const float3 wo) + { + float pdf = 0.f; + if (pDiffuseReflection > 0.f) pdf += pDiffuseReflection * diffuseReflection.EvalPdf(wi, wo); + if (pDiffuseTransmission > 0.f) pdf += pDiffuseTransmission * diffuseTransmission.EvalPdf(wi, wo); + if (pSpecularReflection > 0.f) pdf += pSpecularReflection * specularReflection.EvalPdf(wi, wo); + if (pSpecularReflectionTransmission > 0.f) pdf += pSpecularReflectionTransmission * specularReflectionTransmission.EvalPdf(wi, wo); + return pdf; + } + + void EvalDeltaLobes(const float3 wi, out DeltaLobe deltaLobes[cMaxDeltaLobes], out int deltaLobeCount, out float nonDeltaPart) // wi is in local space + { + deltaLobeCount = 2; // currently - will be 1 more if we add clear coat :) + for (int i = 0; i < deltaLobeCount; i++) + deltaLobes[i] = DeltaLobe::make(); // init to zero + + nonDeltaPart = pDiffuseReflection+pDiffuseTransmission; + if ( specularReflection.alpha > 0 ) // if roughness > 0, lobe is not delta + nonDeltaPart += pSpecularReflection; + if ( specularReflectionTransmission.alpha > 0 ) // if roughness > 0, lobe is not delta + nonDeltaPart += pSpecularReflectionTransmission; + + // no spec reflection or transmission? delta lobes are zero (we can just return, already initialized to 0)! + if ( (pSpecularReflection+pSpecularReflectionTransmission) == 0 ) + return; + + // note, deltaReflection here represents both this.specularReflection and this.specularReflectionTransmission's + DeltaLobe deltaReflection, deltaTransmission; + deltaReflection = deltaTransmission = DeltaLobe::make(); // init to zero + deltaReflection.transmission = false; + deltaTransmission.transmission = true; + + deltaReflection.dir = float3(-wi.x, -wi.y, wi.z); + + if (specularReflection.alpha == 0 && specularReflection.hasLobe(LobeType::DeltaReflection)) + { + deltaReflection.probability = pSpecularReflection; + + // re-compute correct thp for all channels (using float3 version of evalFresnelSchlick!) but then take out the portion that is handled by specularReflectionTransmission below! + deltaReflection.thp = (1-pSpecularReflectionTransmission)*evalFresnelSchlick(specularReflection.albedo, 1.f, wi.z); + } + + // Handle delta reflection/transmission. + if (specularReflectionTransmission.alpha == 0.f) + { + const bool hasReflection = specularReflectionTransmission.hasLobe(LobeType::DeltaReflection); + const bool hasTransmission = specularReflectionTransmission.hasLobe(LobeType::DeltaTransmission); + if (hasReflection || hasTransmission) + { + float cosThetaT; + float F = evalFresnelDielectric(specularReflectionTransmission.eta, wi.z, cosThetaT); + + if (hasReflection) + { + float localProbability = pSpecularReflectionTransmission * F; + float3 weight = float3(1,1,1) * localProbability; + deltaReflection.thp += weight; + deltaReflection.probability += localProbability; + } + + if (hasTransmission) + { + // hack refraction for isThinSurface as the flag means we've entered and left the really thin volume + // not sure probability is valid - I think it is + float actualEta = specularReflectionTransmission.eta; + if (specularReflectionTransmission.isThinSurface) + { + actualEta = 1.0; + F = evalFresnelDielectric(actualEta, wi.z, cosThetaT); + } + + float localProbability = pSpecularReflectionTransmission * (1.0-F); + float3 weight = specularReflectionTransmission.transmissionAlbedo * localProbability; + deltaTransmission.dir = float3(-wi.x * actualEta, -wi.y * actualEta, -cosThetaT); + deltaTransmission.thp = weight; + deltaTransmission.probability = localProbability; + } + + // + // if (abs(wo.z) < kMinCosTheta || (wo.z > 0.f != isReflection)) return false; + } + } + + // Lobes are by convention in this order, and the index must match BSDFSample::getDeltaLobeIndex() as well as the UI. + // When we add clearcoat it goes after deltaReflection and so on. + deltaLobes[0] = deltaTransmission; + deltaLobes[1] = deltaReflection; + } +}; + +struct StandardBSDF +{ + float3 emission; + bool isEnter; + + static StandardBSDF make(Surface surface, bool isEnter = true) + { + StandardBSDF bsdf; + bsdf.emission = surface.Emissive; + bsdf.isEnter = isEnter; + return bsdf; + } + + float4 Eval(const BRDFContext brdfContext, const Material material, const Surface surface, const float3 wo) + { + float3 wi = brdfContext.ViewDirection; + float3 N = surface.Normal; + + float3 wiLocal = surface.ToLocal(wi); + float3 woLocal = surface.ToLocal(wo); +#if defined(PATH_TRACING) +# if HAIR_MODE == HAIR_MODE_CHIANG_BSDF + if (material.Feature == Feature::kHairTint) + { + HairChiangBSDF bsdf = HairChiangBSDF::make(wi, surface); + return bsdf.Eval(wiLocal, woLocal); + } else +# elif HAIR_MODE == HAIR_MODE_FARFIELD_BCSDF + if (material.Feature == Feature::kHairTint) + { + HairFarFieldBCSDF bsdf = HairFarFieldBCSDF::make(wi, surface); + return bsdf.Eval(wiLocal, woLocal); + } else +# endif +#endif + { + DefaultBSDF bsdf = DefaultBSDF::make(N, wi, surface, isEnter); + return bsdf.Eval(wiLocal, woLocal); + } + } + + bool SampleBSDF(const BRDFContext brdfContext, const Material material, const Surface surface, out BSDFSample result, inout uint randomSeed) + { + float4 preGeneratedSamples = float4( + Random(randomSeed), + Random(randomSeed), + Random(randomSeed), + Random(randomSeed) + ); + float3 wi = brdfContext.ViewDirection; + float3 N = surface.Normal; + + float3 wiLocal = surface.ToLocal(wi); + +#if defined(PATH_TRACING) +# if HAIR_MODE == HAIR_MODE_CHIANG_BSDF + if (material.Feature == Feature::kHairTint) + { + HairChiangBSDF bsdf = HairChiangBSDF::make(wi, surface); + + float3 woLocal; + bool valid = bsdf.SampleBSDF(wiLocal, woLocal, result.pdf, result.weight, result.lobe, result.lobeP, preGeneratedSamples); + + result.wo = surface.FromLocal(woLocal); + return valid; + } else +# elif HAIR_MODE == HAIR_MODE_FARFIELD_BCSDF + if (material.Feature == Feature::kHairTint) + { + HairFarFieldBCSDF bsdf = HairFarFieldBCSDF::make(wi, surface); + const float h = 2.0f * Random(randomSeed) - 1.0f; + float lobeRandom = Random(randomSeed); + + float3 woLocal; + bool valid = bsdf.SampleBSDF(wiLocal, h, woLocal, result.pdf, result.weight, result.lobe, result.lobeP, lobeRandom, preGeneratedSamples); + + result.wo = surface.FromLocal(woLocal); + return valid; + } else +# endif +#endif + { + DefaultBSDF bsdf = DefaultBSDF::make(N, wi, surface, isEnter); + + float3 woLocal; + bool valid = bsdf.SampleBSDF(wiLocal, woLocal, result.pdf, result.weight, result.lobe, result.lobeP, preGeneratedSamples); + + result.wo = surface.FromLocal(woLocal); + return valid; + } + } + + float EvalPdf(const BRDFContext brdfContext, const Surface surface, const float3 wo) + { + float3 wi = brdfContext.ViewDirection; + float3 N = surface.Normal; + + float3 wiLocal = surface.ToLocal(wi); + float3 woLocal = surface.ToLocal(wo); + + DefaultBSDF bsdf = DefaultBSDF::make(N, wi, surface, isEnter); + return bsdf.EvalPdf(wiLocal, woLocal); + } + + uint GetLobes(const Surface surface) + { + return DefaultBSDF::getLobes(surface); + } + + void EvalDeltaLobes(const BRDFContext brdfContext, const Surface surface, out DeltaLobe deltaLobes[cMaxDeltaLobes], out int deltaLobeCount, out float nonDeltaPart) + { + float3 wi = brdfContext.ViewDirection; + float3 N = surface.Normal; + + float3 wiLocal = surface.ToLocal(wi); + + DefaultBSDF bsdf = DefaultBSDF::make(N, wi, surface, isEnter); + bsdf.EvalDeltaLobes(wiLocal, deltaLobes, deltaLobeCount, nonDeltaPart); + + for (int i = 0; i < deltaLobeCount; i++) + { + deltaLobes[i].dir = surface.FromLocal(deltaLobes[i].dir); + } + } +}; + +#endif // __BSDF_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/Fresnel.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/Fresnel.hlsli new file mode 100644 index 0000000000..ac0e139c1f --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/Fresnel.hlsli @@ -0,0 +1,141 @@ +/* +* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ +#ifndef __FRESNEL_HLSLI__ +#define __FRESNEL_HLSLI__ + +float CalculateBaseReflectivity(const float incidentIoR, const float transmittedIoR) +{ + const float tmp = (incidentIoR - transmittedIoR) / (incidentIoR + transmittedIoR); + return tmp * tmp; +} + +/** Evaluates the Fresnel term using Schlick's approximation. + Introduced in http://www.cs.virginia.edu/~jdl/bib/appearance/analytic%20models/schlick94b.pdf + + The Fresnel term equals f0 at normal incidence, and approaches f90=1.0 at 90 degrees. + The formulation below is generalized to allow both f0 and f90 to be specified. + + \param[in] f0 Specular reflectance at normal incidence (0 degrees). + \param[in] f90 Reflectance at orthogonal incidence (90 degrees), which should be 1.0 for specular surface reflection. + \param[in] cosTheta Cosine of angle between microfacet normal and incident direction (LdotH). + \return Fresnel term. +*/ +float3 evalFresnelSchlick(float3 f0, float3 f90, float cosTheta) +{ + return f0 + (f90 - f0) * pow(max(1 - cosTheta, 0), 5); // Clamp to avoid NaN if cosTheta = 1+epsilon +} + +float evalFresnelSchlick(float f0, float f90, float cosTheta) +{ + return f0 + (f90 - f0) * pow(max(1 - cosTheta, 0), 5); // Clamp to avoid NaN if cosTheta = 1+epsilon +} + +float3 evalFresnelSchlick(float3 f0, float cosTheta) +{ + return evalFresnelSchlick(f0, float3(1.0f, 1.0f, 1.0f), cosTheta); +} + +float3 evalFresnelGeneralizedSchlick(float3 f0, float3 f90, float exponent, float cosTheta) +{ + return f0 + (f90 - f0) * pow(max(1 - cosTheta, 0), exponent); // Clamp to avoid NaN if cosTheta = 1+epsilon +} + +/** Evaluates the Fresnel term using dieletric fresnel equations. + Based on http://www.pbr-book.org/3ed-2018/Reflection_Models/Specular_Reflection_and_Transmission.html + + \param[in] eta Relative index of refraction (etaI / etaT). + \param[in] cosThetaI Cosine of angle between normal and incident direction. + \param[out] cosThetaT Cosine of angle between negative normal and transmitted direction (0 for total internal reflection). + \return Returns Fr(eta, cosThetaI). +*/ +float evalFresnelDielectric(float eta, float cosThetaI, out float cosThetaT) +{ + if (cosThetaI < 0) + { + eta = 1 / eta; + cosThetaI = -cosThetaI; + } + + float sinThetaTSq = eta * eta * (1 - cosThetaI * cosThetaI); + // Check for total internal reflection + if (sinThetaTSq > 1) + { + cosThetaT = 0; + return 1; + } + + cosThetaT = sqrt(1 - sinThetaTSq); // No clamp needed + + // Note that at eta=1 and cosThetaI=0, we get cosThetaT=0 and NaN below. + // It's important the framework clamps |cosThetaI| or eta to small epsilon. + float Rs = (eta * cosThetaI - cosThetaT) / (eta * cosThetaI + cosThetaT); + float Rp = (eta * cosThetaT - cosThetaI) / (eta * cosThetaT + cosThetaI); + + return 0.5 * (Rs * Rs + Rp * Rp); +} + +/** Evaluates the Fresnel term using dieletric fresnel equations. + Based on http://www.pbr-book.org/3ed-2018/Reflection_Models/Specular_Reflection_and_Transmission.html + + \param[in] eta Relative index of refraction (etaI / etaT). + \param[in] cosThetaI Cosine of angle between normal and incident direction. + \return Returns Fr(eta, cosThetaI). +*/ +float evalFresnelDielectric(float eta, float cosThetaI) +{ + float cosThetaT; + return evalFresnelDielectric(eta, cosThetaI, cosThetaT); +} + +/** Evaluates the Fresnel term using conductor fresnel equations, assuming unpolarized light. + Base on "PHYSICALLY BASED LIGHTING CALCULATIONS FOR COMPUTER GRAPHICS" by Peter Shirley + http://www.cs.virginia.edu/~jdl/bib/globillum/shirley_thesis.pdf + + \param[in] eta Real part of complex index of refraction + \param[in] k Imaginary part of complex index of refraction (the "absorption coefficient") + \param[in] cosThetaI Cosine of angle between normal and incident direction. + \return Returns conductor reflectance. +*/ +float evalFresnelConductor(float eta, float k, float cosThetaI) +{ + float cosThetaISq = cosThetaI * cosThetaI; + float sinThetaISq = max(1.0f - cosThetaISq, 0.0f); + float sinThetaIQu = sinThetaISq * sinThetaISq; + + float innerTerm = eta * eta - k * k - sinThetaISq; + float aSqPlusBSq = sqrt(max(innerTerm*innerTerm + 4.0f * eta * eta * k * k, 0.0f)); + float a = sqrt(max((aSqPlusBSq + innerTerm) * 0.5f, 0.0f)); + + float Rs = ((aSqPlusBSq + cosThetaISq) - (2.0f * a * cosThetaI))/ + ((aSqPlusBSq + cosThetaISq) + (2.0f * a * cosThetaI)); + float Rp = ((cosThetaISq * aSqPlusBSq + sinThetaIQu) - (2.0f * a * cosThetaI * sinThetaISq))/ + ((cosThetaISq * aSqPlusBSq + sinThetaIQu) + (2.0f * a * cosThetaI * sinThetaISq)); + + return 0.5f * (Rs + Rs * Rp); +} + +/** Evaluates the Fresnel term using conductor fresnel equations, assuming unpolarized light. + Convenience function that takes coefficients at 3 wavelengths. + + \param[in] eta Real part of complex index of refraction + \param[in] k Imaginary part of complex index of refraction (the "absorption coefficient") + \param[in] cosThetaI Cosine of angle between normal and incident direction. + \return Returns conductor reflectance. +*/ +float3 evalFresnelConductor(float3 eta, float3 k, float cosThetaI) +{ + return float3( + evalFresnelConductor(eta.x, k.x, cosThetaI), + evalFresnelConductor(eta.y, k.y, cosThetaI), + evalFresnelConductor(eta.z, k.z, cosThetaI) + ); +} + +#endif // __FRESNEL_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairBsdfHelper.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairBsdfHelper.hlsli new file mode 100644 index 0000000000..afc5049ade --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairBsdfHelper.hlsli @@ -0,0 +1,116 @@ +/* +* Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#ifndef __HAIRBSDFHELPER_HLSLI__ +#define __HAIRBSDFHELPER_HLSLI__ + +#include "Raytracing/Includes/Common.hlsli" +#include "Raytracing/Includes/Materials/Fresnel.hlsli" + +float MP(const float cosThetaI, const float cosThetaO, const float sinThetaI, const float sinThetaO, const float v) +{ + const float a = cosThetaI * cosThetaO / v; + const float b = sinThetaI * sinThetaO / v; + const float mp = (v <= 0.1f) ? exp(LogI0(a) - b - 1.0f / v + 0.6931f + log(0.5f / v)) : (exp(-b) * I0(a)) / (sinh(1.0f / v) * 2.0f * v); + return mp; +} + +// Attenuation function Ap. +void AP(const HairMaterialInteraction hairMaterialInteraction, const float cosThetaI, const float3 T, out float3 ap[Hair_Max_Scattering_Events + 1]) +{ + const float cosGammaI = Sqrt01(1.0f - hairMaterialInteraction.h * hairMaterialInteraction.h); + const float cosTheta = cosThetaI * cosGammaI; + const float3 f = hairMaterialInteraction.fresnelApproximation ? + evalFresnelSchlick(CalculateBaseReflectivity(1.0f, hairMaterialInteraction.ior), cosTheta).rrr : + evalFresnelDielectric(hairMaterialInteraction.ior, cosTheta).rrr; + + ap[0] = f; + ap[1] = T * (float3(1.0f, 1.0f, 1.0f) - f) * (float3(1.0f, 1.0f, 1.0f) - f); + [unroll] + for (uint p = 2; p < Hair_Max_Scattering_Events; ++p) + { + ap[p] = ap[p - 1] * T * f; + } + + // Compute attenuation term accounting for remaining orders of scattering. + ap[Hair_Max_Scattering_Events] = ap[Hair_Max_Scattering_Events - 1] * T * f / (float3(1.0f, 1.0f, 1.0f) - T * f); +} + +// Azimuthal scattering function Np. +float NP(const float phi, const int p, const float s, const float gammaI, const float gammaT) +{ + float dphi = phi - PhiFunction(p, gammaI, gammaT); + + // Remap dphi to [-pi, pi]. + dphi = fmod(dphi, K_2PI); + if (dphi > K_PI) + { + dphi -= K_2PI; + } + if (dphi < -K_PI) + { + dphi += K_2PI; + } + + return TrimmedLogistic(dphi, s, -K_PI, K_PI); +} + +// Compute a discrete pdf for sampling Ap (Lobe selection) +void ComputeApPdf(const HairMaterialInteraction hairMaterialInteraction, const float cosThetaO, out float apPdf[Hair_Max_Scattering_Events + 1]) +{ + const float sinThetaO = Sqrt01(1.0f - cosThetaO * cosThetaO); + + // Compute refracted ray. + const float sinThetaT = sinThetaO / hairMaterialInteraction.ior; + const float cosThetaT = Sqrt01(1.0f - sinThetaT * sinThetaT); + + const float etap = Sqrt0(hairMaterialInteraction.ior * hairMaterialInteraction.ior - sinThetaO * sinThetaO) / cosThetaO; + const float sinGammaT = hairMaterialInteraction.h / etap; + const float cosGammaT = Sqrt01(1.0f - sinGammaT * sinGammaT); + + // Compute the transmittance T of a single path through the cylinder. + const float tmp = -2.0f * cosGammaT / cosThetaT; + const float3 T = exp(hairMaterialInteraction.absorptionCoefficient * tmp); + + float3 ap[Hair_Max_Scattering_Events + 1]; + AP(hairMaterialInteraction, cosThetaO, T, ap); + + // Compute apPdf from individual ap terms. + float sumY = 0.0f; + [unroll] + for (uint p = 0; p < Hair_Max_Scattering_Events; ++p) + { + apPdf[p] = Luminance(ap[p]); + sumY += apPdf[p]; + } + + const float invSumY = 1.0f / sumY; + [unroll] + for (uint p2 = 0; p2 < Hair_Max_Scattering_Events; ++p2) + { + apPdf[p2] *= invSumY; + } + + apPdf[Hair_Max_Scattering_Events] = 0.0f; +} + +#endif // __HAIRBSDFHELPER_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairChiangBSDF.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairChiangBSDF.hlsli new file mode 100644 index 0000000000..009a4ff325 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairChiangBSDF.hlsli @@ -0,0 +1,294 @@ +/* +* Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +// Chiang16 Hair model +// Reference Paper: https://benedikt-bitterli.me/pchfm/ +// Reference Article: https://www.pbrt.org/hair.pdf + +#ifndef __HAIRCHIANGBSDF_HLSLI__ +#define __HAIRCHIANGBSDF_HLSLI__ + +#include "Raytracing/Includes/Materials/HairMaterial.hlsli" +#include "Raytracing/Includes/Materials/LobeType.hlsli" + +#include "Raytracing/Includes/Materials/HairBsdfHelper.hlsli" + +#include "Raytracing/Includes/MathHelpers.hlsli" + +struct HairChiangBSDF +{ + HairMaterialData hairMaterialData; + HairInteractionSurface hairInteractionSurface; + HairMaterialInteraction hairMaterialInteraction; + + void __init(float3 wi, Surface surface) + { + hairMaterialData.baseColor = surface.DiffuseAlbedo * surface.DiffuseAlbedo; + hairMaterialData.longitudinalRoughness = surface.Roughness; + hairMaterialData.azimuthalRoughness = surface.Roughness; + + hairMaterialData.ior = 1.55f; // Typical value for human hair + hairMaterialData.eta = 1.0f / 1.55f; + + hairMaterialData.fresnelApproximation = 0; // Dielectric + hairMaterialData.absorptionModel = HairAbsorptionModel_Color; // We don't have melanin data in skyrim + hairMaterialData.melanin = 0.3f; + hairMaterialData.melaninRedness = 0.5f; + hairMaterialData.cuticleAngleInDegrees = 3.0f; + + hairInteractionSurface = CreateHairInteractionSurface(wi, surface.Tangent, surface.Bitangent, surface.Normal); + hairMaterialInteraction = CreateHairMaterialInteraction(hairMaterialData, hairInteractionSurface); + } + + static HairChiangBSDF make(float3 wi, Surface surface) + { + HairChiangBSDF bsdf; + bsdf.__init(wi, surface); + return bsdf; + } + + static uint getLobes(Surface surface) + { + uint lobes = (uint)LobeType::DiffuseReflection | (uint)LobeType::DiffuseTransmission; + + return lobes; + } + + float4 Eval(const float3 wi, const float3 wo) // for hair, wi = light dir, wo = view dir + { + const float sinThetaO = wo.x; + const float cosThetaO = Sqrt01(1.0f - sinThetaO * sinThetaO); + const float phiO = Atan2safe(wo.z, wo.y); + + const float sinThetaI = wi.x; + const float cosThetaI = Sqrt01(1.0f - sinThetaI * sinThetaI); + const float phiI = Atan2safe(wi.z, wi.y); + + // Compute refracted ray. + const float sinThetaT = sinThetaO / hairMaterialInteraction.ior; + const float cosThetaT = Sqrt01(1.0f - sinThetaT * sinThetaT); + + const float etap = Sqrt0(hairMaterialInteraction.ior * hairMaterialInteraction.ior - sinThetaO * sinThetaO) / cosThetaO; + const float sinGammaT = hairMaterialInteraction.h / etap; + const float cosGammaT = Sqrt01(1.0f - sinGammaT * sinGammaT); + const float gammaT = asin(clamp(sinGammaT, -1.0f, 1.0f)); + + // Compute the transmittance T of a single path through the cylinder + const float tmp = -2.0f * cosGammaT / cosThetaT; + const float3 T = exp(hairMaterialInteraction.absorptionCoefficient * tmp); + + // Evaluate hair BCSDF for each lobe + const float phi = phiI - phiO; + float3 ap[Hair_Max_Scattering_Events + 1]; + AP(hairMaterialInteraction, cosThetaO, T, ap); + float3 result = 0.0f; + + [unroll] + for (uint p = 0; p < Hair_Max_Scattering_Events; ++p) + { + float sinThetaOp, cosThetaOp; + if (p == 0) + { + sinThetaOp = sinThetaO * hairMaterialInteraction.cos2kAlpha[1] - cosThetaO * hairMaterialInteraction.sin2kAlpha[1]; + cosThetaOp = cosThetaO * hairMaterialInteraction.cos2kAlpha[1] + sinThetaO * hairMaterialInteraction.sin2kAlpha[1]; + } + else if (p == 1) + { + sinThetaOp = sinThetaO * hairMaterialInteraction.cos2kAlpha[0] + cosThetaO * hairMaterialInteraction.sin2kAlpha[0]; + cosThetaOp = cosThetaO * hairMaterialInteraction.cos2kAlpha[0] - sinThetaO * hairMaterialInteraction.sin2kAlpha[0]; + } + else if (p == 2) + { + sinThetaOp = sinThetaO * hairMaterialInteraction.cos2kAlpha[2] + cosThetaO * hairMaterialInteraction.sin2kAlpha[2]; + cosThetaOp = cosThetaO * hairMaterialInteraction.cos2kAlpha[2] - sinThetaO * hairMaterialInteraction.sin2kAlpha[2]; + } + else + { + sinThetaOp = sinThetaO; + cosThetaOp = cosThetaO; + } + + cosThetaOp = abs(cosThetaOp); + result += MP(cosThetaOp, cosThetaI, sinThetaOp, sinThetaI, hairMaterialInteraction.v[p]) * + ap[p] * + NP(phi, p, hairMaterialInteraction.logisticDistributionScalar, hairMaterialInteraction.gammaI, gammaT); + } + + // Compute contribution of remaining terms after Hair_Max_Scattering_Events + result += MP(cosThetaO, cosThetaI, sinThetaO, sinThetaI, hairMaterialInteraction.v[Hair_Max_Scattering_Events]) * + ap[Hair_Max_Scattering_Events] * + K_1_2PI; + + // We omit this computation in BSDF, because the cosThetaI_N will be cancelled out when evaluate scattered radiance anyway + // const float cosThetaI_N = wi.z; // The angle between wi and normal, which is (0, 0, 1) on local space + // result = abs(cosThetaI_N) > 0.0f ? result / abs(cosThetaI_N) : 0.0f; + + return float4(max(result, 0.0f), Average(result)); + } + + bool SampleBSDF(const float3 wo, out float3 wi, out float pdf, out float3 weight, out uint lobe, out float lobeP, const float4 preGeneratedSample) + { + float2 u[2]; + u[0] = preGeneratedSample.xy; + u[1] = preGeneratedSample.zw; + + lobe = (uint)LobeType::DiffuseReflection; + lobeP = 1.0f; + uint lobeType; + + const float sinThetaO = wo.x; + const float cosThetaO = Sqrt01(1.0f - sinThetaO * sinThetaO); + const float phiO = Atan2safe(wo.z, wo.y); + + // Determine which term p to sample for hair scattering. + float apPdf[Hair_Max_Scattering_Events + 1]; + ComputeApPdf(hairMaterialInteraction, cosThetaO, apPdf); + + uint p = 0; + float vp = hairMaterialInteraction.v[0]; + { + [unroll] + for (uint i = 0; i < Hair_Max_Scattering_Events; ++i) + { + if (u[0].x >= apPdf[i]) + { + u[0].x -= apPdf[i]; + p = i + 1; + vp = hairMaterialInteraction.v[i + 1]; + } + else + { + break; + } + } + } + + float sinThetaOp = sinThetaO; + float cosThetaOp = cosThetaO; + if (p == 0) + { + sinThetaOp = sinThetaO * hairMaterialInteraction.cos2kAlpha[1] - cosThetaO * hairMaterialInteraction.sin2kAlpha[1]; + cosThetaOp = cosThetaO * hairMaterialInteraction.cos2kAlpha[1] + sinThetaO * hairMaterialInteraction.sin2kAlpha[1]; + lobeType = HairLobeType_R; + } + else if (p == 1) + { + sinThetaOp = sinThetaO * hairMaterialInteraction.cos2kAlpha[0] + cosThetaO * hairMaterialInteraction.sin2kAlpha[0]; + cosThetaOp = cosThetaO * hairMaterialInteraction.cos2kAlpha[0] - sinThetaO * hairMaterialInteraction.sin2kAlpha[0]; + + lobeType = HairLobeType_TT; + } + else if (p == 2) + { + sinThetaOp = sinThetaO * hairMaterialInteraction.cos2kAlpha[2] + cosThetaO * hairMaterialInteraction.sin2kAlpha[2]; + cosThetaOp = cosThetaO * hairMaterialInteraction.cos2kAlpha[2] - sinThetaO * hairMaterialInteraction.sin2kAlpha[2]; + + lobeType = HairLobeType_TRT; + } + else + { + lobeType = HairLobeType_TRT; + } + + u[1].x = max(u[1].x, 1e-5f); + const float cosTheta = 1.0f + vp * log(u[1].x + (1.0f - u[1].x) * exp(-2.0f / vp)); + const float sinTheta = Sqrt01(1.0f - cosTheta * cosTheta); + const float cosPhi = cos(u[1].y * K_2PI); + const float sinThetaI = -cosTheta * sinThetaOp + sinTheta * cosPhi * cosThetaOp; + const float cosThetaI = Sqrt01(1.0f - sinThetaI * sinThetaI); + + // Sample Np to compute dphi + const float etap = Sqrt0(hairMaterialInteraction.ior * hairMaterialInteraction.ior - sinThetaO * sinThetaO) / cosThetaO; + const float sinGammaT = hairMaterialInteraction.h / etap; + const float gammaT = asin(clamp(sinGammaT, -1.0f, 1.0f)); + float dphi; + if (p < Hair_Max_Scattering_Events) + { + dphi = PhiFunction(p, hairMaterialInteraction.gammaI, gammaT) + + SampleTrimmedLogistic(u[0].y, hairMaterialInteraction.logisticDistributionScalar, -K_PI, K_PI); + } + else + { + dphi = u[0].y * K_2PI; + } + + const float phiI = phiO + dphi; + wi = float3(sinThetaI, cosThetaI * cos(phiI), cosThetaI * sin(phiI)); + + pdf = 0.0f; + [unroll] + for (uint i = 0; i < Hair_Max_Scattering_Events; ++i) + { + float sinThetaIp, cosThetaIp; + if (i == 0) + { + sinThetaIp = sinThetaI * hairMaterialInteraction.cos2kAlpha[1] - cosThetaI * hairMaterialInteraction.sin2kAlpha[1]; + cosThetaIp = cosThetaI * hairMaterialInteraction.cos2kAlpha[1] + sinThetaI * hairMaterialInteraction.sin2kAlpha[1]; + } + else if (i == 1) + { + sinThetaIp = sinThetaI * hairMaterialInteraction.cos2kAlpha[0] + cosThetaI * hairMaterialInteraction.sin2kAlpha[0]; + cosThetaIp = cosThetaI * hairMaterialInteraction.cos2kAlpha[0] - sinThetaI * hairMaterialInteraction.sin2kAlpha[0]; + } + else if (i == 2) + { + sinThetaIp = sinThetaI * hairMaterialInteraction.cos2kAlpha[2] + cosThetaI * hairMaterialInteraction.sin2kAlpha[2]; + cosThetaIp = cosThetaI * hairMaterialInteraction.cos2kAlpha[2] - sinThetaI * hairMaterialInteraction.sin2kAlpha[2]; + } + else + { + sinThetaIp = sinThetaI; + cosThetaIp = cosThetaI; + } + + cosThetaIp = abs(cosThetaIp); + pdf += MP(cosThetaIp, cosThetaO, sinThetaIp, sinThetaO, hairMaterialInteraction.v[i]) * + apPdf[i] * + NP(dphi, i, hairMaterialInteraction.logisticDistributionScalar, hairMaterialInteraction.gammaI, gammaT); + } + pdf += MP(cosThetaI, cosThetaO, sinThetaI, sinThetaO, hairMaterialInteraction.v[Hair_Max_Scattering_Events]) * + apPdf[Hair_Max_Scattering_Events] * + K_1_2PI; + + if (pdf > 1e-3f) + { + weight = Eval(wi, wo).xyz / pdf; + // we treat R as specular, TT as diffuse transmission, TRT as diffuse reflection + if (lobeType == HairLobeType_TT) { + lobe = (uint)LobeType::DiffuseTransmission; + } else { + lobe = (uint)LobeType::DiffuseReflection; + } + lobeP = 1.0f; + return true; + } + else + { + weight = 0.0f; + lobe = 0; + lobeP = 0.0f; + return false; + } + } +}; + +#endif // __HAIRCHIANGBSDF_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairFarFieldBCSDF.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairFarFieldBCSDF.hlsli new file mode 100644 index 0000000000..4baaaa13d6 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairFarFieldBCSDF.hlsli @@ -0,0 +1,303 @@ +/* +* Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#ifndef __HAIRFARFIELDBCSDF_HLSLI__ +#define __HAIRFARFIELDBCSDF_HLSLI__ + +#include "Raytracing/Includes/Materials/HairMaterial.hlsli" +#include "Raytracing/Includes/Materials/LobeType.hlsli" + +#include "Raytracing/Includes/Materials/HairBsdfHelper.hlsli" + +#include "Raytracing/Includes/MathHelpers.hlsli" + +// tighten the R lobe (or not) with phi - [d'Eon et al. 2014 SIGGRAPH talk] +#define R_TERM_AZIMUTHAL_SQUEEZE max(0.01f, cos(0.5f * phi)) + +// Essential interface functions invoked in generated material code +// Custom far-field BCSDF eval() [Eugene d'Eon - 2022] +// R lobe: [d'Eon et al. 2014 - SIGGRAPH talk] +// TT lobe: [Marschner et al. 2003] +// TRT lobe: custom 3-Gaussian lobe based on fitting to MC simulation +struct HairFarFieldBCSDF +{ + HairMaterialData hairMaterialData; + HairInteractionSurface hairInteractionSurface; + HairMaterialInteractionBcsdf hairMaterialInteractionBcsdf; + + void __init(float3 wi, Surface surface) + { + hairMaterialData.baseColor = surface.DiffuseAlbedo; + hairMaterialData.longitudinalRoughness = surface.Roughness; + hairMaterialData.azimuthalRoughness = surface.Roughness; + + hairMaterialData.ior = 1.55f; // Typical value for human hair + hairMaterialData.eta = 1.0f / 1.55f; + + hairMaterialData.fresnelApproximation = 0; // Dielectric + hairMaterialData.absorptionModel = HairAbsorptionModel_Color; // We don't have melanin data in skyrim + hairMaterialData.melanin = 0.3f; + hairMaterialData.melaninRedness = 0.5f; + hairMaterialData.cuticleAngleInDegrees = 3.0f; + + hairInteractionSurface = CreateHairInteractionSurface(wi, surface.Tangent, surface.Bitangent, surface.Normal); + hairMaterialInteractionBcsdf = CreateHairMaterialInteractionBcsdf(hairMaterialData, 0.f, 0.f, surface.Roughness); + } + + static HairFarFieldBCSDF make(float3 wi, Surface surface) + { + HairFarFieldBCSDF bcsdf; + bcsdf.__init(wi, surface); + return bcsdf; + } + + static uint getLobes(Surface surface) + { + uint lobes = (uint)LobeType::DiffuseReflection | (uint)LobeType::DiffuseTransmission; + + return lobes; + } + + float4 Eval(const float3 wi, const float3 wo) // for hair, wi = light dir, wo = view dir + { + const float3 tangentU = hairInteractionSurface.tangent; // tangent of hair + + // determine cylindrical coordinates (theta/phi) [Marschner et al. 2003] + const float sinThetaI = dot(wi, tangentU); + const float sinThetaO = dot(wo, tangentU); + const float thetaI = asin(sinThetaI); + const float thetaO = asin(sinThetaO); + const float thetaH = 0.5f * (thetaO + thetaI); + const float thetaD = 0.5f * (thetaO - thetaI); + const float3 N = normalize(wi - sinThetaI * tangentU); + const float3 tpo = normalize(wo - sinThetaO * tangentU); + const float cosPhi = clamp(dot(N, tpo), -1.0f, 1.0f); + const float phi = acos(cosPhi); + + // load fiber properties + const float roughness = hairMaterialInteractionBcsdf.roughness; + const float ior = hairMaterialInteractionBcsdf.ior; + const float iorSqr = ior * ior; + const float f0 = CalculateBaseReflectivity(1.0f, ior); + const float3 mua = hairMaterialInteractionBcsdf.absorptionCoefficient; + + // Compute R lobe - smooth N term, Gaussian M term + const float fresCosR = cos(0.5f * acos(clamp(dot(wi, wo), -1.0f, 1.0f))); // [d'Eon et al. 2011 - (12)] + const float fresnelTermR = evalFresnelSchlick(f0, fresCosR).x; + const float betaR = sqrt(2.0f) * roughness * R_TERM_AZIMUTHAL_SQUEEZE; + const float M_R = Gaussian1D(thetaH + hairMaterialInteractionBcsdf.cuticleAngle, betaR * 0.5f); + const float N_R = fresnelTermR * 0.25f * cos(0.5f * phi); + + // Compute TT lobe - smooth N term, Gaussian M term + const float betaTT = (sinThetaI < 1.0f) ? 0.25f * roughness * Sqrt0(-((-1.0f + iorSqr) / (-1.0f + sinThetaI * sinThetaI))) : 100000.0f; + const float M_TT = max((sinThetaI < 1.0f) ? Gaussian1D(thetaH - 0.5f * hairMaterialInteractionBcsdf.cuticleAngle, betaTT) : 0.0f, 0.0f); + const float sinThetaD = sin(thetaD); + const float etaPrmInv = cos(thetaD) / Sqrt0(iorSqr - sinThetaD * sinThetaD); // 1.0 / eta_prime + const float etaPrmInvSqr = etaPrmInv * etaPrmInv; + // hTT: root of phi(h) for p = 1 + const float hTT = clamp(Sqrt01((0.5f + 0.5f * cosPhi) / (1.0f + etaPrmInvSqr - 2.0f * etaPrmInv * Sqrt01(0.5f - 0.5f * cosPhi))), -1.0f, 1.0f); + const float TTfresnelDot = cos(thetaD) * cos(asin(hTT)); + const float TT_f = evalFresnelSchlick(f0, TTfresnelDot).x; // [d'Eon et al. 2011 - (14)] + const float fresnelTermTT = (1.0f - TT_f) * (1.0f - TT_f); + const float N_TT = + -1.0f / (2.0f * (-2.0f / Sqrt01(1.0f - hTT * hTT) + (2.0f * etaPrmInv) / Sqrt01(1.0f - etaPrmInvSqr * hTT * hTT))); + const float cosThetaT = cos(thetaD) / (etaPrmInv * ior); + const float gammaT = asin(hTT * etaPrmInv); + const float3 absorptionTT = exp(-mua * 2.0f * cos(gammaT) / cosThetaT); // TODO: absorption with Medulla + const float TTClamp = phi < 2.001f * acos(Sqrt01(1.0f - etaPrmInvSqr)) ? 0.0f : 1.0f; + const float3 A_TT = fresnelTermTT * absorptionTT * TTClamp; + + // compute TRT lobe as sum of 3 Gaussians + const float betaTRT = roughness * (2.0f + pow(abs(thetaI), 1.5f)); + const float M_TRT = Gaussian1D(thetaH - 1.5f * hairMaterialInteractionBcsdf.cuticleAngle, betaTRT * 0.5f); + const float clampTT = (phi < 2.001f * acos(Sqrt01(1.0f - etaPrmInvSqr))) ? 0.0f : 1.0f; + + const float ti = abs(thetaD); + + float p1, w1, w2, v1, v2; + + if (ti < 0.525f) + { + p1 = cos(ti) - 0.733f; + w1 = (-0.000111282f) * (-0.103125f + ti) * ti + pow(ti, 15.7265f) + 0.00023939f; + w2 = 0.000322755f * ((ti - pow(1.80972f * ti, 16.7669f)) * tan(ti) + 0.991977f); + v1 = 0.00597578f - (-0.000428897f * cos((5.41149f * ti))); + v2 = 0.0181f * tan(cos(2.121f * ti)); + } + else if (ti < 1.1f) + { + p1 = max(0.0f, 0.00493f + 0.579f * ti - 0.775f * ti * ti); + w1 = 0.00108f - 0.0014f * ti + 0.0003937f * ti * ti; + w2 = -0.00119f + 0.00219f * ti; + v1 = 0.0391f - 0.0888f * ti + 0.0581f * ti * ti; + v2 = 0.384f - 1.14f * ti + 0.942f * ti * ti; + } + else + { + p1 = 0.0f; + w1 = 0.0f; + w2 = 0.000239f + 0.00139f * ti * ti * ti - 0.00053124f * ti * ti * ti * ti * ti; + v1 = 1.0f; + v2 = -1.86f + 2.73f * ti - 0.7437f * ti * ti; + } + + const float TRTwidth1 = roughness / 0.06f * Sqrt0(float(v1)); + const float TRTwidth2 = roughness / 0.06f * Sqrt0(float(v2)); + const float N_TRT = float((200.0f / K_PI) * (w1 * Gaussian1D(phi - float(p1), TRTwidth1) + w1 * Gaussian1D(phi + float(p1), TRTwidth1) + + w2 * Gaussian1D(phi, TRTwidth2))); + + // assume h = 0 for absorption + const float3 absorptionTRT = exp(-mua * 3.75f / cosThetaT); + const float3 A_TRT = absorptionTRT; + + // eval: + float3 bsdf = max(0.5f * (M_R * N_R + M_TT * N_TT * A_TT * clampTT + M_TRT * N_TRT * A_TRT) / (cos(thetaD) * cos(thetaD)) * cos(thetaI), 0.0f); + + // pdf is just eval() without the absorption terms applied + float pdf = 0.5f * (M_R * N_R + M_TT * N_TT * fresnelTermTT * TTClamp + M_TRT * N_TRT) / cos(thetaD) / cos(thetaD) * cos(thetaI); + + return float4(bsdf, pdf); + } + + bool SampleBSDF(const float3 wo, const float h, out float3 wi, out float pdf, out float3 weight, out uint lobe, out float lobeP, const float lobeRandom, const float4 preGeneratedSample) + { + const float2 rand2[2] = { preGeneratedSample.xy, preGeneratedSample.zw }; + + const float3 T = hairInteractionSurface.tangent; + const float3 N = hairInteractionSurface.shadingNormal; + const float3 B = cross(N, T); + + const float sinThetaO = clamp(dot(T, wo), -1.0f, 1.0f); + const float cosThetaO = Sqrt01(1.0f - sinThetaO * sinThetaO); + const float thetaO = asin(sinThetaO); + + const float f0 = CalculateBaseReflectivity(1.0f, hairMaterialInteractionBcsdf.ior); + const float mua = Luminance(hairMaterialInteractionBcsdf.absorptionCoefficient); + const float ior = hairMaterialInteractionBcsdf.ior; + const float roughness = hairMaterialInteractionBcsdf.roughness; + + // sample lobe using specular cone propagation at selected h offset + // equivalent to assuming thetaI = thetaO + const float aSpec = cosThetaO / Sqrt0(pow(ior, 2.0f) - pow(sinThetaO, 2.0f)); + const float fSpecR = evalFresnelSchlick(f0, cosThetaO * Sqrt01(1.0 - h * h)).x; + const float fSpecT = 1.0 - fSpecR; + const float cosThetaTSpec = cosThetaO / (aSpec * ior); + const float gammaTSpec = asin(h * aSpec); + const float absorptionSpec = exp(-2.0f * mua * (1.0f + cos(2.0f * gammaTSpec)) / cosThetaTSpec); + + const float hAlbedoR = fSpecR; + const float hAlbedoTT = fSpecT * absorptionSpec * fSpecT; + const float hAlbedoTRT = fSpecT * absorptionSpec * fSpecR * absorptionSpec * fSpecT; + const float hAlbedoNorm = hAlbedoR + hAlbedoTT + hAlbedoTRT; + + const float wR = hAlbedoR / hAlbedoNorm; + const float wTT = hAlbedoTT / hAlbedoNorm; + const float wTRT = hAlbedoTRT / hAlbedoNorm; + + const float weightSum = wR + wTT + wTRT; + const float pdfLobeR = wR / weightSum; + const float pdfLobeTT= wTT / weightSum; + const float pdfLobeTRT = wTRT / weightSum; + + float sampleWeight = 0.0f; + float lobeWeight = 0.0f; + if (lobeRandom < pdfLobeR) + { + // sample R + const float phi = PhiR(h); + const float betaR = sqrt(2.0f) * roughness * R_TERM_AZIMUTHAL_SQUEEZE; + const float thetaI = -thetaO + RandomGaussian1D(rand2[0].x, rand2[0].y) * betaR; + + wi = cos(phi) * cos(thetaI) * N + sin(phi) * cos(thetaI) * B + sin(thetaI) * T; + + const float fresnelTermR = evalFresnelSchlick(f0, cos(0.5f * acos(dot(wi, wo)))).x; + + sampleWeight = clamp(fresnelTermR / wR, 0.0f, 2.0f); + + lobe = (uint)LobeType::DiffuseReflection; + lobeP = pdfLobeR; + } + else if (lobeRandom < pdfLobeR + pdfLobeTT) + { + // sample TT + const float betaTT = (roughness * Sqrt0(-((-1.0f + pow(ior, 2.0f)) / (-1.0f + pow(sinThetaO, 2.0f))))) / 2.0f; + const float thetaI = -thetaO + RandomGaussian1D(rand2[0].x, rand2[0].y) * betaTT; + const float thetaD = 0.5f * (thetaI - thetaO); + + const float a = cos(thetaD) / Sqrt0(pow(ior, 2.0f) - pow(sin(thetaD), 2.0f)); // 1.0 / eta_prime + const float phi = PhiTT(h, a); + + wi = cos(phi) * cos(thetaI) * N + sin(phi) * cos(thetaI) * B + sin(thetaI) * T; + + const float f = evalFresnelSchlick(f0, cos(thetaD) * cos(asin(h))).x; // [d'Eon et al. 2011 - (14)] + + const float cosThetaT = cos(thetaD) / (a * ior); + const float gammaT = asin(h * a); + const float absorption = exp(-mua * (1.0f + cos(2.0f * gammaT)) / cosThetaT); + + sampleWeight = clamp((1.0 - f) * (1.0 - f) * absorption / wTT, 0.0f, 2.0f); + + lobe = (uint)LobeType::DiffuseTransmission; + lobeP = pdfLobeTT; + } + else + { + // sample TRT + const float betaTRT = roughness * (2.0f + pow(abs(thetaO), 1.5f)); + const float thetaI = -thetaO + RandomGaussian1D(rand2[0].x, rand2[0].y) * betaTRT; + const float thetaD = 0.5f * (thetaI - thetaO); + + const float a = cos(thetaD) / Sqrt0(pow(ior, 2.0f) - pow(sin(thetaD), 2.0f)); // 1.0 / eta_prime + const float phi = PhiTRT(h, a) + RandomGaussian1D(rand2[1].x, rand2[1].y) * roughness; + + wi = cos(phi) * cos(thetaI) * N + sin(phi) * cos(thetaI) * B + sin(thetaI) * T; + + const float f = evalFresnelSchlick(f0, cos(thetaD) * cos(asin(h))).x; // [d'Eon et al. 2011 - (14)] + + const float cosThetaT = cos(thetaD) / (a * ior); + const float gammaT = asin(h * a); + const float absorption = exp(-2.3f * mua * (1.0f + cos(2.0f * gammaT)) / cosThetaT); + + sampleWeight = clamp((1.0f - f) * (1.0f - f) * f * absorption / wTRT, 0.0f, 2.0f); + + lobe = (uint)(LobeType::DiffuseReflection); + lobeP = pdfLobeTRT; + } + + float4 evalResult = Eval(wi, wo); + float3 bsdfValue = evalResult.xyz; + pdf = evalResult.w; + + if (pdf < 1e-6f) + { + weight = float3(0.0f, 0.0f, 0.0f); + lobe = 0; + lobeP = 0.0f; + return false; + } + + weight = max(bsdfValue * sampleWeight / pdf, 0.0f); + return true; + } +}; + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairMaterial.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairMaterial.hlsli new file mode 100644 index 0000000000..cbb797f0b2 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/HairMaterial.hlsli @@ -0,0 +1,343 @@ +// https://github.com/NVIDIA-RTX/RTXCR +/* +* Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#ifndef __HAIR_MATERIAL_HLSLI__ +#define __HAIR_MATERIAL_HLSLI__ + +#define HairLobeType uint +#define HairLobeType_R (0) +#define HairLobeType_TT (1) +#define HairLobeType_TRT (2) +#define Hair_Max_Scattering_Events (3) + +#define HairAbsorptionModel uint +#define HairAbsorptionModel_Color (0) +#define HairAbsorptionModel_Physics (1) +#define HairAbsorptionModel_Normalized (2) + +#define PI_OVER_EIGHT 0.626657069f // sqrt(pi / 8.0f); + +/************************************************ + Hair Surface +************************************************/ + +struct HairInteractionSurface +{ + float3 incidentRayDirection; + float3 shadingNormal; + float3 tangent; +}; + +HairInteractionSurface CreateHairInteractionSurface( + const float3 incidentRayDirection, + const float3 tangentWorld, + const float3 biTangentWorld, + const float3 normalWorld) +{ + const float3x3 hairTangentBasis = float3x3(tangentWorld, biTangentWorld, normalWorld); // TBN + + const float3 incidentRayDirectionTangentSpace = mul(hairTangentBasis, incidentRayDirection); + HairInteractionSurface hairInteractionSurface; + hairInteractionSurface.incidentRayDirection = incidentRayDirectionTangentSpace; + hairInteractionSurface.shadingNormal = float3(0.0f, 0.0f, 1.0f); + hairInteractionSurface.tangent = float3(0.0f, 1.0f, 0.0f); + return hairInteractionSurface; +} + +/************************************************ + Hair Material +************************************************/ + +struct HairMaterialData +{ + float3 baseColor; + float longitudinalRoughness; // beta_m + + float azimuthalRoughness; // beta_n + float ior; + float eta; + uint fresnelApproximation; + + uint absorptionModel; + float melanin; + float melaninRedness; + float cuticleAngleInDegrees; // alpha +}; + +/************************************************ + Hair Interaction - Chiang BSDF +************************************************/ + +struct HairMaterialInteraction +{ + float h; + float gammaI; + float3 absorptionCoefficient; + + float ior; + float eta; + uint fresnelApproximation; + + float logisticDistributionScalar; // s + + float v[Hair_Max_Scattering_Events + 1]; + + float sin2kAlpha[Hair_Max_Scattering_Events]; + float cos2kAlpha[Hair_Max_Scattering_Events]; +}; + +// Compute Longitudinal Roughness Variance +void ComputeRoughnessVariance(const float betaM, inout HairMaterialInteraction hairMaterialInteraction) +{ + float tmp = 0.726f * betaM + 0.812f * betaM * betaM + 3.7f * pow(betaM, 20.f); + hairMaterialInteraction.v[0] = max(tmp * tmp, 1e-7f); + hairMaterialInteraction.v[1] = 0.25f * hairMaterialInteraction.v[0]; + hairMaterialInteraction.v[2] = 4 * hairMaterialInteraction.v[0]; + [unroll] + for (uint p = 3; p <= Hair_Max_Scattering_Events; ++p) + { + hairMaterialInteraction.v[p] = hairMaterialInteraction.v[2]; + } +} + +// Compute azimuthally offset h +float CalculateAzimuthallyDistance(const HairInteractionSurface hairInteractionSurface) +{ + // Project wi to the (B, N) plane + float3 wiProj = normalize(hairInteractionSurface.incidentRayDirection - + dot(hairInteractionSurface.incidentRayDirection, hairInteractionSurface.tangent) * hairInteractionSurface.tangent); + // Calculate the vector that perpendicular with projected wi on (B, N) plane + float3 wiProjPerpendicular = cross(wiProj, hairInteractionSurface.tangent); + // h = sin(Gamma) = cos(pi/2 - Gamma) = dot(N, Wi_Proj_Prependicular) + return dot(hairInteractionSurface.shadingNormal, wiProjPerpendicular); +} + +// Mapping from color to absorption coefficient. +float3 AbsorptionCoefficientFromColor(const float3 color, const float betaN) +{ + const float tmp = 5.969f - 0.215f * betaN + 2.532f * betaN * betaN - 10.73f * pow(betaN, 3.0f) + 5.574f * pow(betaN, 4.0f) + 0.245f * pow(betaN, 5.0f); + const float3 sqrtAbsorptionCoefficient = log(max(color, 1e-4f)) / tmp; + return sqrtAbsorptionCoefficient * sqrtAbsorptionCoefficient; +} + +// Mapping from hair melanin to absorption coefficient +float3 ComputeAbsorptionFromMelanin(float eumelanin, float pheomelanin) +{ + return max(eumelanin * float3(0.506f, 0.841f, 1.653f) + pheomelanin * float3(0.343f, 0.733f, 1.924f), float3(0.0f, 0.0f, 0.0f)); +} + +float3 AbsorptionCoefficientFromMelanin(const float melanin_concentration, const float melanin_redness) +{ + float melanin_concentration_value = melanin_concentration; + float melanin_gamma = 2.4f; + float melanin = melanin_concentration_value * melanin_concentration_value * melanin_gamma; + float eumelanin = melanin * (1.0f - melanin_redness); + float pheomelanin = melanin * melanin_redness; + return ComputeAbsorptionFromMelanin(eumelanin, pheomelanin); +} + +float3 AbsorptionCoefficientFromMelaninNormalized(const float melanin, const float melaninRedness) +{ + const float melaninQty = -log(max(1.0f - melanin, 0.0001f)); + const float eumelanin = melaninQty * (1.0f - melaninRedness); + const float pheomelanin = melaninQty * melaninRedness; + // Adjusted sigma coefficient for range [0, 1] + const float3 eumelaninSigmaA = float3(0.506f, 0.841f, 1.653f); + const float3 pheomelaninSigmaA = float3(0.343f, 0.733f, 1.924f); + return eumelanin.rrr * eumelaninSigmaA + pheomelanin.rrr * pheomelaninSigmaA; +} + +float3 ComputeAbsorptionCoefficient(const HairMaterialData hairMaterialData) +{ + switch (hairMaterialData.absorptionModel) + { + case HairAbsorptionModel_Color: + return AbsorptionCoefficientFromColor(hairMaterialData.baseColor, hairMaterialData.azimuthalRoughness); + case HairAbsorptionModel_Physics: + return AbsorptionCoefficientFromMelanin(hairMaterialData.melanin, hairMaterialData.melaninRedness); + case HairAbsorptionModel_Normalized: + return AbsorptionCoefficientFromMelaninNormalized(hairMaterialData.melanin, hairMaterialData.melaninRedness); + } + return float3(0.0f, 0.0f, 0.0f); +} + +// Compute azimuthal logistic scale factor +float ComputelogisticDistributionScalar(const float betaN) +{ + return max(PI_OVER_EIGHT * (0.265f * betaN + 1.194f * betaN * betaN + 5.372f * pow(betaN, 22.0f)), 1e-7f); +} + +// Compute the scales that caused by the angle between hair cuticle and hair surface +// / / / <-- Hair Cuticles +// / / / +// /____/____/____ <-- Hair Surface +// +void ComputeHairCuticleScales(const float cuticleAngleInDegrees, inout HairMaterialInteraction hairMaterialInteraction) +{ + hairMaterialInteraction.sin2kAlpha[0] = sin(cuticleAngleInDegrees / 180.0f * K_PI); + hairMaterialInteraction.cos2kAlpha[0] = sqrt(saturate(1.f - hairMaterialInteraction.sin2kAlpha[0] * hairMaterialInteraction.sin2kAlpha[0])); + [unroll] + for (uint i = 1; i < 3; i++) + { + // sin(2*Theta) = 2 * sin(Theta) * cos(Theta) + hairMaterialInteraction.sin2kAlpha[i] = + 2 * hairMaterialInteraction.cos2kAlpha[i - 1] * hairMaterialInteraction.sin2kAlpha[i - 1]; + // cos(2*Theta) = (cos(Theta))^2 - (sin(Theta))^2 + hairMaterialInteraction.cos2kAlpha[i] = + hairMaterialInteraction.cos2kAlpha[i - 1] * hairMaterialInteraction.cos2kAlpha[i - 1] - + hairMaterialInteraction.sin2kAlpha[i - 1] * hairMaterialInteraction.sin2kAlpha[i - 1]; + } +} + +HairMaterialInteraction CreateHairMaterialInteraction( + const HairMaterialData hairMaterialData, + const HairInteractionSurface hairInteractionSurface) +{ + HairMaterialInteraction hairMaterialInteraction; + hairMaterialInteraction.h = CalculateAzimuthallyDistance(hairInteractionSurface); + hairMaterialInteraction.gammaI = asin(clamp(hairMaterialInteraction.h, -1.0f, 1.0f)); + hairMaterialInteraction.absorptionCoefficient = ComputeAbsorptionCoefficient(hairMaterialData); + hairMaterialInteraction.fresnelApproximation = hairMaterialData.fresnelApproximation; + hairMaterialInteraction.ior = hairMaterialData.ior; + hairMaterialInteraction.eta = hairMaterialData.eta; + hairMaterialInteraction.logisticDistributionScalar = ComputelogisticDistributionScalar(hairMaterialData.azimuthalRoughness); + // Compute hairMaterialInteraction.v + ComputeRoughnessVariance(hairMaterialData.longitudinalRoughness, hairMaterialInteraction); + // Compute Hair Scales + ComputeHairCuticleScales(hairMaterialData.cuticleAngleInDegrees, hairMaterialInteraction); + return hairMaterialInteraction; +} + +/************************************************ + Hair Interaction - Separate Chiang BSDF +************************************************/ + +struct HairMaterialSeparateChiangData +{ + HairMaterialData base; + + float longitudinalRoughnessTT; + float longitudinalRoughnessTRT; + float azimuthalRoughnessTT; + float azimuthalRoughnessTRT; +}; + +struct HairMaterialSeparateChiangInteraction +{ + float h; + float gammaI; + float3 absorptionCoefficient; + + float ior; + float eta; + uint fresnelApproximation; + + float logisticDistributionScalar[Hair_Max_Scattering_Events + 1]; // s + + float v[Hair_Max_Scattering_Events + 1]; + + float sin2kAlpha[Hair_Max_Scattering_Events]; + float cos2kAlpha[Hair_Max_Scattering_Events]; +}; + +float ComputeRoughnessVarianceSeparateChiang(const float betaM) +{ + const float tmp = 0.726f * betaM + 0.812f * betaM * betaM + 3.7f * pow(betaM, 20.f); + return max(tmp * tmp, 1e-7f); +} + +void ComputeHairCuticleScalesSeparateChiang(const float cuticleAngleInDegrees, inout HairMaterialSeparateChiangInteraction hairMaterialSeparateChiangInteraction) +{ + hairMaterialSeparateChiangInteraction.sin2kAlpha[0] = sin(cuticleAngleInDegrees / 180.0f * K_PI); + hairMaterialSeparateChiangInteraction.cos2kAlpha[0] = sqrt(saturate(1.f - hairMaterialSeparateChiangInteraction.sin2kAlpha[0] * hairMaterialSeparateChiangInteraction.sin2kAlpha[0])); + [unroll] + for (uint i = 1; i < 3; i++) + { + // sin(2*Theta) = 2 * sin(Theta) * cos(Theta) + hairMaterialSeparateChiangInteraction.sin2kAlpha[i] = + 2 * hairMaterialSeparateChiangInteraction.cos2kAlpha[i - 1] * hairMaterialSeparateChiangInteraction.sin2kAlpha[i - 1]; + // cos(2*Theta) = (cos(Theta))^2 - (sin(Theta))^2 + hairMaterialSeparateChiangInteraction.cos2kAlpha[i] = + hairMaterialSeparateChiangInteraction.cos2kAlpha[i - 1] * hairMaterialSeparateChiangInteraction.cos2kAlpha[i - 1] - + hairMaterialSeparateChiangInteraction.sin2kAlpha[i - 1] * hairMaterialSeparateChiangInteraction.sin2kAlpha[i - 1]; + } +} + +HairMaterialSeparateChiangInteraction CreateHairMaterialSeparateChiangInteraction( + const HairMaterialSeparateChiangData hairMaterialSeparateChiangData, + const HairInteractionSurface hairInteractionSurface) +{ + HairMaterialSeparateChiangInteraction hairMaterialSeparateChiangInteraction; + hairMaterialSeparateChiangInteraction.h = CalculateAzimuthallyDistance(hairInteractionSurface); + hairMaterialSeparateChiangInteraction.gammaI = asin(clamp(hairMaterialSeparateChiangInteraction.h, -1.0f, 1.0f)); + hairMaterialSeparateChiangInteraction.absorptionCoefficient = ComputeAbsorptionCoefficient(hairMaterialSeparateChiangData.base); + hairMaterialSeparateChiangInteraction.fresnelApproximation = hairMaterialSeparateChiangData.base.fresnelApproximation; + hairMaterialSeparateChiangInteraction.ior = hairMaterialSeparateChiangData.base.ior; + hairMaterialSeparateChiangInteraction.eta = hairMaterialSeparateChiangData.base.eta; + hairMaterialSeparateChiangInteraction.logisticDistributionScalar[0] = ComputelogisticDistributionScalar(hairMaterialSeparateChiangData.base.azimuthalRoughness); + hairMaterialSeparateChiangInteraction.logisticDistributionScalar[1] = ComputelogisticDistributionScalar(hairMaterialSeparateChiangData.azimuthalRoughnessTT); + hairMaterialSeparateChiangInteraction.logisticDistributionScalar[2] = ComputelogisticDistributionScalar(hairMaterialSeparateChiangData.azimuthalRoughnessTRT); + hairMaterialSeparateChiangInteraction.logisticDistributionScalar[3] = hairMaterialSeparateChiangInteraction.logisticDistributionScalar[2]; + // Compute hairMaterialInteraction.v + hairMaterialSeparateChiangInteraction.v[0] = ComputeRoughnessVarianceSeparateChiang(hairMaterialSeparateChiangData.base.longitudinalRoughness); + hairMaterialSeparateChiangInteraction.v[1] = ComputeRoughnessVarianceSeparateChiang(hairMaterialSeparateChiangData.longitudinalRoughnessTT); + hairMaterialSeparateChiangInteraction.v[2] = ComputeRoughnessVarianceSeparateChiang(hairMaterialSeparateChiangData.longitudinalRoughnessTRT); + hairMaterialSeparateChiangInteraction.v[3] = hairMaterialSeparateChiangInteraction.v[2]; + // Compute Hair Scales + ComputeHairCuticleScalesSeparateChiang(hairMaterialSeparateChiangData.base.cuticleAngleInDegrees, hairMaterialSeparateChiangInteraction); + return hairMaterialSeparateChiangInteraction; +} + +/************************************************ + Hair Interaction - Farfield BSDF +************************************************/ + +struct HairMaterialInteractionBcsdf +{ + float3 diffuseReflectionTint; + float diffuseReflectionWeight; + + float roughness; + float3 absorptionCoefficient; + + float ior; + float cuticleAngle; +}; + +HairMaterialInteractionBcsdf CreateHairMaterialInteractionBcsdf( + const HairMaterialData hairMaterialData, + const float3 diffuseReflectionTint, + const float diffuseReflectionWeight, + const float roughness) +{ + HairMaterialInteractionBcsdf hairMaterialInteractionBcsdf; + hairMaterialInteractionBcsdf.diffuseReflectionTint = diffuseReflectionTint; + hairMaterialInteractionBcsdf.diffuseReflectionWeight = diffuseReflectionWeight; + hairMaterialInteractionBcsdf.roughness = roughness; + hairMaterialInteractionBcsdf.absorptionCoefficient = ComputeAbsorptionCoefficient(hairMaterialData); + hairMaterialInteractionBcsdf.ior = hairMaterialData.ior; + hairMaterialInteractionBcsdf.cuticleAngle = radians(hairMaterialData.cuticleAngleInDegrees); + return hairMaterialInteractionBcsdf; +} + +#endif // __HAIR_MATERIAL_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/LobeType.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/LobeType.hlsli new file mode 100644 index 0000000000..a025474e20 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/LobeType.hlsli @@ -0,0 +1,32 @@ +#ifndef __LOBE_TYPE_HLSLI__ // using instead of "#pragma once" due to https://github.com/microsoft/DirectXShaderCompiler/issues/3943 +#define __LOBE_TYPE_HLSLI__ + +/** Flags representing the various lobes of a BxDF. +*/ +enum class LobeType // : uint32_t +{ + None = 0x00u, + + DiffuseReflection = 0x01u, + SpecularReflection = 0x02u, + DeltaReflection = 0x04u, + + DiffuseTransmission = 0x10u, + SpecularTransmission = 0x20u, + DeltaTransmission = 0x40u, + + Diffuse = 0x11u, + Specular = 0x22u, + Delta = 0x44u, + NonDelta = 0x33u, + + Reflection = 0x0fu, + Transmission = 0xf0u, + + NonDeltaReflection = 0x03u, + NonDeltaTransmission = 0x30u, + + All = 0xffu, +}; + +#endif // __LOBE_TYPE_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/Microfacet.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/Microfacet.hlsli new file mode 100644 index 0000000000..ec005192ba --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/Microfacet.hlsli @@ -0,0 +1,331 @@ +#ifndef __MICROFACET_HLSLI__ +#define __MICROFACET_HLSLI__ + +#include "Raytracing/Includes/MathConstants.hlsli" + +/** Evaluates the GGX (Trowbridge-Reitz) normal distribution function (D). + + Introduced by Trowbridge and Reitz, "Average irregularity representation of a rough surface for ray reflection", Journal of the Optical Society of America, vol. 65(5), 1975. + See the correct normalization factor in Walter et al. https://dl.acm.org/citation.cfm?id=2383874 + We use the simpler, but equivalent expression in Eqn 19 from http://blog.selfshadow.com/publications/s2012-shading-course/hoffman/s2012_pbs_physics_math_notes.pdf + + For microfacet models, D is evaluated for the direction h to find the density of potentially active microfacets (those for which microfacet normal m = h). + The 'alpha' parameter is the standard GGX width, e.g., it is the square of the linear roughness parameter in Disney's BRDF. + Note there is a singularity (0/0 = NaN) at NdotH = 1 and alpha = 0, so alpha should be clamped to some epsilon. + + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] cosTheta Dot product between shading normal and half vector, in positive hemisphere. + \return D(h) +*/ +float evalNdfGGX(float alpha, float cosTheta) +{ + float a2 = alpha * alpha; + float d = ((cosTheta * a2 - cosTheta) * cosTheta + 1); + return a2 / (d * d * K_PI); +} + +/** Evaluates the PDF for sampling the GGX normal distribution function using Walter et al. 2007's method. + See https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf + + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] cosTheta Dot product between shading normal and half vector, in positive hemisphere. + \return D(h) * cosTheta +*/ +float evalPdfGGX_NDF(float alpha, float3 wi, float3 h) +{ + float cosTheta = h.z; + return evalNdfGGX(alpha, cosTheta) * cosTheta / (max(0.f, dot(wi, h)) * 4.0f); // "1.0 / max(0.f, dot(wi, h)) * 4.0f" term used to be applied externally +} + +/** Samples the GGX (Trowbridge-Reitz) normal distribution function (D) using Walter et al. 2007's method. + Note that the sampled half vector may lie in the negative hemisphere. Such samples should be discarded. + See Eqn 35 & 36 in https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf + See Listing A.1 in https://seblagarde.files.wordpress.com/2015/07/course_notes_moving_frostbite_to_pbr_v32.pdf + + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] u Uniform random number (2D). + \param[out] pdf Sampling probability. + \return Sampled half vector in local space. +*/ +float3 sampleGGX_NDF(float alpha, float2 u) +{ + float alphaSqr = alpha * alpha; + float phi = u.y * (2 * K_PI); + float tanThetaSqr = alphaSqr * u.x / (1 - u.x); + float cosTheta = 1 / sqrt(1 + tanThetaSqr); + float r = sqrt(max(1 - cosTheta * cosTheta, 0)); + + return float3(cos(phi) * r, sin(phi) * r, cosTheta); +} + +float evalG1GGX(float alphaSqr, float cosTheta); + +/** Evaluates the PDF for sampling the GGX distribution of visible normals (VNDF). + See http://jcgt.org/published/0007/04/01/paper.pdf + + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] wi Incident direction in local space, in the positive hemisphere. + \param[in] h Half vector in local space, in the positive hemisphere. + \return D_V(h) = G1(wi) * D(h) * max(0,dot(wi,h)) / wi.z +*/ +float evalPdfGGX_VNDF(float alpha, float3 wi, float3 h) +{ + float G1 = evalG1GGX(alpha * alpha, wi.z); + float D = evalNdfGGX(alpha, h.z); + +#if 0 // old code; "1.0 / max(0.f, dot(wi, h)) * 4.0f" term used to be applied externally + return G1 * D * max(0.f, dot(wi, h)) / wi.z; +#else + return G1 * D * max(0.f, dot(wi, h)) / (wi.z * max(0.f, dot(wi, h)) * 4.0f); // <- corrected? +#endif +} + +/** Evaluates the PDF for sampling the GGX distribution of >bounded< visible normals (BVNDF). + See https://gpuopen.com/download/publications/Bounded_VNDF_Sampling_for_Smith-GGX_Reflections.pdf, + Adapted from listing 2. + + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] wi Incident direction in local space, in the positive hemisphere. + \param[in] h Half vector in local space, in the positive hemisphere. + \return pdf +*/ +float evalPdfGGX_BVNDF( float _alpha, float3 i, float3 m ) +{ + float2 alpha = _alpha.xx; // TODO: add support for anisotropic roughness + //float3 m = normalize( i + o ); + float ndf = evalNdfGGX(_alpha, m.z); //D(m , alpha); // TODO: add support for anisotropic roughness + float2 ai = alpha * i.xy ; + float len2 = dot(ai, ai ); + float t = sqrt ( len2 + i.z * i.z ); +#if 0 // our i.z is always in positive hemisphere + if ( i.z >= 0.0f ) +#endif + { + float a = saturate(min(alpha.x, alpha.y)); // Eq. 6 + float s = 1.0f + length(float2(i.x, i.y)); // Omit sgn for a <=1 + float a2 = a * a; + float s2 = s * s; + float k = (1.0f - a2) * s2 / (s2 + a2 * i.z * i.z); // Eq. 5 + return ndf / (2.0f * (k * i.z + t)); // Eq. 8 * || dm/do || + } +#if 0 // our i.z is always in positive hemisphere + // Numerically stable form of the previous PDF for i.z < 0 + return ndf * ( t - i.z ) / (2.0f * len2 ) ; // = Eq. 7 * || dm/do || +#endif +} + +/** Samples the GGX (Trowbridge-Reitz) using the distribution of visible normals (VNDF). + The GGX VDNF yields significant variance reduction compared to sampling of the GGX NDF. + See http://jcgt.org/published/0007/04/01/paper.pdf + + \param[in] alpha Isotropic GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] wi Incident direction in local space, in the positive hemisphere. + \param[in] u Uniform random number (2D). + // \param[out] pdf Sampling probability. - removed for simplicity / removing code duplication; use 'evalPdfGGX_VNDF', compiler is smart enough to optimize things out + \return Sampled half vector in local space, in the positive hemisphere. +*/ +float3 sampleGGX_VNDF(float alpha, float3 wi, float2 u) +{ + float alpha_x = alpha, alpha_y = alpha; + + // Transform the view vector to the hemisphere configuration. + float3 Vh = normalize(float3(alpha_x * wi.x, alpha_y * wi.y, wi.z)); + + // Construct orthonormal basis (Vh,T1,T2). +#if 0 + float3 T1 = (Vh.z < 0.9999f) ? normalize(cross(float3(0, 0, 1), Vh)) : float3(1, 0, 0); // TODO: fp32 precision +#else + // from latest http://jcgt.org/published/0007/04/01/paper.pdf - fewer instructions than above; 0.0002 threshold found empirically and matches above variant + float lensq = Vh.x * Vh.x + Vh.y * Vh.y; + float3 T1 = lensq > 0.0002f ? float3(-Vh.y, Vh.x, 0) * rsqrt(lensq) : float3(1,0,0); +#endif + float3 T2 = cross(Vh, T1); + + // Parameterization of the projected area of the hemisphere. + float r = sqrt(u.x); + float phi = (2.f * K_PI) * u.y; + float t1 = r * cos(phi); + float t2 = r * sin(phi); + float s = 0.5f * (1.f + Vh.z); + t2 = (1.f - s) * sqrt(1.f - t1 * t1) + s * t2; + + // Reproject onto hemisphere. + float3 Nh = t1 * T1 + t2 * T2 + sqrt(max(0.f, 1.f - t1 * t1 - t2 * t2)) * Vh; + + // Transform the normal back to the ellipsoid configuration. This is our half vector. + float3 h = normalize(float3(alpha_x * Nh.x, alpha_y * Nh.y, max(0.f, Nh.z))); + + // pdf = evalPdfGGX_VNDF(alpha, wi, h); + return h; +} + +/** Samples the GGX using the >bounded< distribution of visible normals (VNDF). + See https://gpuopen.com/download/publications/Bounded_VNDF_Sampling_for_Smith-GGX_Reflections.pdf, + Adapted from listing 1. + + \param[in] alpha Isotropic GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] wi Incident direction in local space, in the positive hemisphere. + \param[in] u Uniform random number (2D). + \return Sampled half vector in local space, in the positive hemisphere. +*/ +float3 sampleGGX_BVNDF(float _alpha, float3 i, float2 rand) +{ + float2 alpha = _alpha.xx; // TODO: add support for anisotropic roughness + + float3 i_std = normalize ( float3 ( i.xy * alpha, i.z ) ) ; + // Sample a spherical cap + float phi = 2.0f * K_PI * rand.x ; + float a = saturate( min( alpha.x, alpha.y ) ); // Eq. 6 + float s = 1.0f + length( float2( i.x, i.y ) ); // Omit sgn for a <=1 + float a2 = a * a; float s2 = s * s; + float k = (1.0f - a2) * s2 / (s2 + a2 * i.z * i.z); // Eq. 5 + float b = i.z > 0 ? k * i_std.z : i_std.z; + float z = mad (1.0f - rand.y , 1.0f + b, -b ); + float sinTheta = sqrt( saturate( 1.0f - z * z ) ); + float3 o_std = float3( sinTheta * cos( phi ), sinTheta * sin( phi ), z ); + // Compute the microfacet normal m + float3 m_std = i_std + o_std ; + + float3 m = normalize( float3( m_std.xy * alpha , m_std.z ) ); + + // Transform the normal back to the ellipsoid configuration. This is our half vector. From this we can compute reflection vector with reflect(-ViewVector, h); + return normalize( float3( m_std.xy * alpha , m_std.z ) ); +} + +/** Evaluates the Smith masking function (G1) for the GGX normal distribution. + See Eq 34 in https://www.cs.cornell.edu/~srm/publications/EGSR07-btdf.pdf + + The evaluated direction is assumed to be in the positive hemisphere relative the half vector. + This is the case when both incident and outgoing direction are in the same hemisphere, but care should be taken with transmission. + + \param[in] alphaSqr Squared GGX width parameter. + \param[in] cosTheta Dot product between shading normal and evaluated direction, in the positive hemisphere. +*/ +float evalG1GGX(float alphaSqr, float cosTheta) +{ + if (cosTheta <= 0) return 0; + float cosThetaSqr = cosTheta * cosTheta; + float tanThetaSqr = max(1 - cosThetaSqr, 0) / cosThetaSqr; + return 2 / (1 + sqrt(1 + alphaSqr * tanThetaSqr)); +} + +/** Evaluates the Smith lambda function for the GGX normal distribution. + See Eq 72 in http://jcgt.org/published/0003/02/03/paper.pdf + + \param[in] alphaSqr Squared GGX width parameter. + \param[in] cosTheta Dot product between shading normal and the evaluated direction, in the positive hemisphere. +*/ +float evalLambdaGGX(float alphaSqr, float cosTheta) +{ + if (cosTheta <= 0) return 0; + float cosThetaSqr = cosTheta * cosTheta; + float tanThetaSqr = max(1 - cosThetaSqr, 0) / cosThetaSqr; + return 0.5 * (-1 + sqrt(1 + alphaSqr * tanThetaSqr)); +} + +/** Evaluates the separable form of the masking-shadowing function for the GGX normal distribution, using Smith's approximation. + See Eq 98 in http://jcgt.org/published/0003/02/03/paper.pdf + + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] cosThetaI Dot product between shading normal and incident direction, in positive hemisphere. + \param[in] cosThetaO Dot product between shading normal and outgoing direction, in positive hemisphere. + \return G(cosThetaI, cosThetaO) +*/ +float evalMaskingSmithGGXSeparable(float alpha, float cosThetaI, float cosThetaO) +{ + float alphaSqr = alpha * alpha; + float lambdaI = evalLambdaGGX(alphaSqr, cosThetaI); + float lambdaO = evalLambdaGGX(alphaSqr, cosThetaO); + return 1 / ((1 + lambdaI) * (1 + lambdaO)); +} + +/** Evaluates the height-correlated form of the masking-shadowing function for the GGX normal distribution, using Smith's approximation. + See Eq 99 in http://jcgt.org/published/0003/02/03/paper.pdf + + Eric Heitz recommends using it in favor of the separable form as it is more accurate and of similar complexity. + The function is only valid for cosThetaI > 0 and cosThetaO > 0 and should be clamped to 0 otherwise. + + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] cosThetaI Dot product between shading normal and incident direction, in positive hemisphere. + \param[in] cosThetaO Dot product between shading normal and outgoing direction, in positive hemisphere. + \return G(cosThetaI, cosThetaO) +*/ +float evalMaskingSmithGGXCorrelated(float alpha, float cosThetaI, float cosThetaO) +{ + float alphaSqr = alpha * alpha; + float lambdaI = evalLambdaGGX(alphaSqr, cosThetaI); + float lambdaO = evalLambdaGGX(alphaSqr, cosThetaO); + return 1 / (1 + lambdaI + lambdaO); +} + +/** Approximate pre-integrated specular BRDF. The approximation assumes GGX VNDF and Schlick's approximation. + See Eq 4 in [Ray Tracing Gems, Chapter 32] + + \param[in] specularReflectance Reflectance from a direction parallel to the normal. + \param[in] alpha GGX width parameter (should be clamped to small epsilon beforehand). + \param[in] cosTheta Dot product between shading normal and evaluated direction, in the positive hemisphere. +*/ +float3 approxSpecularIntegralGGX(float3 specularReflectance, float alpha, float cosTheta) +{ + cosTheta = abs(cosTheta); + + float4 X; + X.x = 1.f; + X.y = cosTheta; + X.z = cosTheta * cosTheta; + X.w = cosTheta * X.z; + + float4 Y; + Y.x = 1.f; + Y.y = alpha; + Y.z = alpha * alpha; + Y.w = alpha * Y.z; + + float2x2 M1 = float2x2( + 0.995367f, -1.38839f, + -0.24751f, 1.97442f + ); + + float3x3 M2 = float3x3( + 1.0f, 2.68132f, 52.366f, + 16.0932f, -3.98452f, 59.3013f, + -5.18731f, 255.259f, 2544.07f + ); + + float2x2 M3 = float2x2( + -0.0564526f, 3.82901f, + 16.91f, -11.0303f + ); + + float3x3 M4 = float3x3( + 1.0f, 4.11118f, -1.37886f, + 19.3254f, -28.9947f, 16.9514f, + 0.545386f, 96.0994f, -79.4492f + ); + + float bias = dot(mul(M1, X.xy), Y.xy) * rcp(dot(mul(M2, X.xyw), Y.xyw)); + float scale = dot(mul(M3, X.xy), Y.xy) * rcp(dot(mul(M4, X.xzw), Y.xyw)); + + // This is a hack for specular reflectance of 0 + float specularReflectanceLuma = dot(specularReflectance, float3( (1.f / 3.f).xxx )); + bias *= saturate(specularReflectanceLuma * 50.0f); + + return mad(specularReflectance, max(0.0, scale), max(0.0, bias)); +} + +// Evaluates microfacet specular BRDF +float3 evalMicrofacet(const float3 wi, const float3 wo, const float3 N, const float alpha) +{ + float3 h = normalize(wi + wo); + float NdotL = max(0.0f, dot(N, wo)); + float NdotV = max(0.0f, dot(N, wi)); + float NdotH = max(0.0f, dot(N, h)); + float VdotH = max(0.0f, dot(wi, h)); + + float D = evalNdfGGX(alpha, NdotH); + float G = evalMaskingSmithGGXCorrelated(alpha, NdotV, NdotL); + + return (D * G * NdotL) / (4.0f * NdotV * NdotL + 1e-7f); +} + +#endif // __MICROFACET_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/SubsurfaceMaterial.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/SubsurfaceMaterial.hlsli new file mode 100644 index 0000000000..f2dc8240c0 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/SubsurfaceMaterial.hlsli @@ -0,0 +1,148 @@ +/* +* Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#ifndef __SUBSURFACEMATERIAL_HLSLI__ +#define __SUBSURFACEMATERIAL_HLSLI__ + +#include "Raytracing/Includes/MathHelpers.hlsli" + +#define MAX_SSS_SAMPLE_COUNT 4 + +#define SSS_METERS_UNIT (1.f / 14.28f) // mm to skyrim units + +#define SSS_MIN_ALBEDO 0.01f + +/************************************************ + Subsurface Material +************************************************/ + +struct SubsurfaceMaterialData +{ + float3 transmissionColor; + float g; + + float3 scatteringColor; + float scale; +}; + +struct SubsurfaceInteraction +{ + float3 centerPosition; + + float3 normal; + float3 tangent; + float3 biTangent; +}; + +struct SubsurfaceSample +{ + float3 samplePosition; + float3 bssrdfWeight; +}; + +struct VolumeCoefficients +{ + float3 scattering; + float3 absorption; +}; + +struct SubsurfaceMaterialCoefficients +{ + float3 sigma_s; + float3 sigma_t; + float3 albedo; + float3 ssAlbedo; +}; + +// Helper functions +SubsurfaceMaterialData CreateDefaultSubsurfaceMaterialData() +{ + SubsurfaceMaterialData subsurfaceMaterialData; + subsurfaceMaterialData.transmissionColor = float3(0.0f, 0.0f, 0.0f); + subsurfaceMaterialData.scatteringColor = float3(0.0f, 0.0f, 0.0f); + subsurfaceMaterialData.g = 0.0f; + subsurfaceMaterialData.scale = 0.0f; + return subsurfaceMaterialData; +} + +SubsurfaceInteraction CreateSubsurfaceInteraction( + const float3 centerPosition, + const float3 normal, + const float3 tangent, + const float3 biTangent) +{ + SubsurfaceInteraction subsurfaceInteraction; + subsurfaceInteraction.centerPosition = centerPosition; + subsurfaceInteraction.normal = normal; + subsurfaceInteraction.tangent = tangent; + subsurfaceInteraction.biTangent = biTangent; + + return subsurfaceInteraction; +} + +//https://blog.selfshadow.com/publications/s2017-shading-course/imageworks/s2017_pbs_imageworks_slides_v2.pdf +float3 ComputeTransmissionAlbedo(in const float3 transmissionColor) +{ + return float3(4.09712f, 4.09712f, 4.09712f) + + (4.20863f * transmissionColor) - + Sqrt0(9.59217f + + 41.6808f * transmissionColor + + 17.7126f * transmissionColor * transmissionColor); +} + +VolumeCoefficients ComputeSubsurfaceVolumeCoefficients(in const SubsurfaceMaterialData sssData) +{ + const float3 s = ComputeTransmissionAlbedo(sssData.transmissionColor); + const float3 alpha = (float3(1.f, 1.f, 1.f) - s * s) / max(float3(1.f, 1.f, 1.f) - sssData.g * (s * s), 1e-7f); + const float scale = SSS_METERS_UNIT * sssData.scale; + const float3 scatteringRadius = max(scale.rrr * sssData.scatteringColor, 1e-7f); + + VolumeCoefficients subsurfaceVolumeCoefficients; + subsurfaceVolumeCoefficients.scattering = alpha / scatteringRadius; + subsurfaceVolumeCoefficients.absorption = + (float3(1.f, 1.f, 1.f) / scatteringRadius) - subsurfaceVolumeCoefficients.scattering; + + return subsurfaceVolumeCoefficients; +} + +SubsurfaceMaterialCoefficients ComputeSubsurfaceMaterialCoefficients(in const SubsurfaceMaterialData sssData) +{ + VolumeCoefficients volumeCoefficients = ComputeSubsurfaceVolumeCoefficients(sssData); + const float3 sigma_a = volumeCoefficients.absorption; + const float3 sigma_s = volumeCoefficients.scattering; + const float3 sigma_t = max(sigma_a + sigma_s, 1e-7f); + + const float3 mfp = 1.0f.rrr / sigma_t; + const float3 s = Sqrt0(sigma_a * mfp); // sigma_a / sigma_t + + // custom diffuse albedo prediction based on MC simulation of isotropic scattering, diffuse transmittance on entry + // and Fresnel reflection back into the volume assuming ior = 1.4 (as if the air outside was denser) + SubsurfaceMaterialCoefficients subsurfaceMaterialCoefficients; + subsurfaceMaterialCoefficients.sigma_s = sigma_s; + subsurfaceMaterialCoefficients.sigma_t = sigma_a + sigma_s; + subsurfaceMaterialCoefficients.albedo = 0.88f * (1.0f - s) / (1.0f + 1.5535f * s); + subsurfaceMaterialCoefficients.ssAlbedo = max(SSS_MIN_ALBEDO, sigma_s / sigma_t); + + return subsurfaceMaterialCoefficients; +} + +#endif // __SUBSURFACEMATERIAL_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/SubsurfaceScattering.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/SubsurfaceScattering.hlsli new file mode 100644 index 0000000000..0aa5ab68be --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/SubsurfaceScattering.hlsli @@ -0,0 +1,135 @@ +/* +* Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#ifndef __SUBSURFACESCATTERING_HLSLI__ +#define __SUBSURFACESCATTERING_HLSLI__ + +#include "Raytracing/Includes/Materials/SubsurfaceMaterial.hlsli" + +/*//////////////// bibliography //////////////////// +[1] Christensen, P.H. and Burley, B. Approximate Reflectance Profiles for Efficient Subsurface Scattering. 7. +/////////////////////////////////////////////////*/ + +// [1], S is a scaling factor based on curve fitting, there are different setups +float3 SSS_S(float3 albedo) +{ +#ifdef USE_DIFFUSE_MEAN_FREE_PATH + const float3 A33 = (albedo - 0.33); + const float3 A332 = A33 * A33; + return (3.5 + 100*A332*A332); +#else + const float3 absa = abs(albedo - 0.8); + return 1.85 - albedo + 7 * absa * absa * absa; +#endif +} + +float4 SampleBurleyProfileMIS( + in float rand, + in const float3 mfp, + in const float3 diffuseAlbedo, + in const float3 ssAlbedo, + in const bool enableTransmission) +{ + // Importance Sampling Color Channels + const float3 albedoNormalized = diffuseAlbedo / max(diffuseAlbedo.r + diffuseAlbedo.g + diffuseAlbedo.b, 1e-7f).rrr; + const float2 channelCdf = float2(albedoNormalized.x, albedoNormalized.x + albedoNormalized.y); + uint channel = 0; + if (rand < channelCdf.x) + { + rand = rand / channelCdf.x; + } + else + { + if (rand < channelCdf.y) + { + rand = (rand - channelCdf.x) / albedoNormalized.y; + channel = 1; // sample from green profile: 2 pi r R(r) + } + else + { + rand = (rand - channelCdf.y) / albedoNormalized.z; + channel = 2; // sample from blue profile: 2 pi r R(r) + } + } + + const float3 s = SSS_S(diffuseAlbedo); + const float3 d = max(mfp * s, 1e-7f); + + float r = 0.0f; + if (rand < 0.25f) + { + rand *= 4.0f; // Reuse random var and map to [0, 1] + r = -log(rand) / d[channel]; // r = -log(rand) * l / s = -log(rand) / mfp * s = -log(rand) / d + } + else + { + rand = (rand - 0.25f) / 0.75f; // Reuse random var and map to [0, 1] + r = -3.0f * log(rand) / d[channel]; + } + + const float3 pdf3 = 0.25f * d * (exp(-r * d) + exp(-r * d / 3.0f)); + // only subtract single-scattering if transmission is enabled: + const float3 pdfSS = enableTransmission ? (0.266f * ssAlbedo * (exp(-5.434f * mfp * r) + exp(-1.811f * mfp * r)) * mfp) : 0.0f; + return float4((diffuseAlbedo * pdf3 - pdfSS) / dot(albedoNormalized, pdf3).rrr, r); +} + +void EvalBurleyDiffusionProfile( + in const SubsurfaceMaterialData subsurfaceMaterialData, + in const SubsurfaceInteraction subsurfaceInteraction, + in const float maxSampleRadius, + in const bool enableTransmission, + in const float2 rand2, + inout SubsurfaceSample sssSample) +{ + const SubsurfaceMaterialCoefficients sssMaterialCoeffcients = ComputeSubsurfaceMaterialCoefficients(subsurfaceMaterialData); + + const float4 burleyProfileMisSample = SampleBurleyProfileMIS(rand2.x, + sssMaterialCoeffcients.sigma_t, + sssMaterialCoeffcients.albedo, + sssMaterialCoeffcients.ssAlbedo, + enableTransmission); + const float3 bssrdfWeight = burleyProfileMisSample.xyz; // bssrdf / pdf + const float r = burleyProfileMisSample.w; + + const float l = sqrt(max(maxSampleRadius * maxSampleRadius - r * r, 1e-7f)); + sssSample.samplePosition = CalculateDiskSamplePosition(rand2.y, r, subsurfaceInteraction.centerPosition, subsurfaceInteraction.tangent, subsurfaceInteraction.biTangent) + + subsurfaceInteraction.normal * l; + sssSample.bssrdfWeight = bssrdfWeight; +} + +// [Jensen01] A Practical Model for Subsurface Light Transport +// dLo = S(xi, wi; xo, wo) * dLi * cosTheta +// = C * R(r) * Ft(xi, wi) * Ft(xo, wo) * dLi(xi, wi) * cos(Ni, Li) +// C = 1/pi +// +// TODO: Figure out how to properly handle the rough surface fresnel terms. +// They currently don't have a closed form solution for BSSRDF. +float3 EvalBssrdf( + in const SubsurfaceSample sssSample, + in const float3 incidentRadiance, + in const float NoL) +{ + const float3 sampleIrradiance = incidentRadiance * NoL.xxx; + return K_1_PI * sssSample.bssrdfWeight * sampleIrradiance; +} + +#endif // __SUBSURFACESCATTERING_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/TexLODHelpers.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/TexLODHelpers.hlsli new file mode 100644 index 0000000000..a87c5bf492 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/TexLODHelpers.hlsli @@ -0,0 +1,456 @@ +/* +* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#ifndef __TEX_LOD_HELPERS_HLSLI__ +#define __TEX_LOD_HELPERS_HLSLI__ + +#include "Raytracing/Includes/MathHelpers.hlsli" + +/** Helper functions for the texture level-of-detail (LOD) system. + + Supports texture LOD both for ray differentials (Igehy, SIGGRAPH 1999) and a method based on ray cones, + described in + * "Strategies for Texture Level-of-Detail for Real-Time Ray Tracing," by Tomas Akenine-Moller et al., Ray Tracing Gems, 2019, + * "Improved Shader and Texture Level-of-Detail using Ray Cones" by Akenine-Moller et al., Journal of Graphics Tools, 2021, + * "Refraction Ray Cones for Texture Level of Detail" by Boksansky et al., to appear in Ray Tracing Gems II, 2021. + + Note that the actual texture lookups are baked into the TextureSampler interfaces. + + See WhittedRayTracer.* for an example using these functions. +*/ + +// Modes for calculating spread angle from curvature +#define TEXLOD_SPREADANGLE_RTG1 0 // 0: Original approach derived from RTG. +#define TEXLOD_SPREADANGLE_ARC_LENGTH_UNOPTIMIZED 1 // 1: New arc-length integration approach, unoptimized. +#define TEXLOD_SPREADANGLE_ARC_LENGTH_OPTIMIZED 2 // 2: New arc-length integration approach, optimized. + +// Chose one of modes above, default to optimized arc length approach (2) +#define TEXLOD_SPREADANGLE_FROM_CURVATURE_MODE TEXLOD_SPREADANGLE_ARC_LENGTH_OPTIMIZED + +// Uncomment to use FP16 for ray cone payload +// #define USE_RAYCONES_WITH_FP16_IN_RAYPAYLOAD + +// log2 function clamped to a valid domain. The range of lod values that can be returned is [-126, 127]. +float SafeLog2(float x) { return log2(clamp(x, FLT_MIN, FLT_MAX)); } + +// ---------------------------------------------------------------------------- +// Ray cone helpers +// ---------------------------------------------------------------------------- + +/** Describes a ray cone for texture level-of-detail. + + Representing a ray cone based on width and spread angle. Has both FP32 and FP16 support. + Use #define USE_RAYCONES_WITH_FP16_IN_RAYPAYLOAD to use FP16 + + Note: spread angle is the whole (not half) cone angle! See https://research.nvidia.com/publication/2021-08_refraction-ray-cones-texture-level-detail +*/ +struct RayCone +{ +#ifndef USE_RAYCONES_WITH_FP16_IN_RAYPAYLOAD + float width; + float spreadAngle; + float getWidth() { return width; } + float getSpreadAngle() { return spreadAngle; } +#else + uint widthSpreadAngleFP16; + float getWidth() { return f16tof32(widthSpreadAngleFP16 >> 16); } + float getSpreadAngle() { return f16tof32(widthSpreadAngleFP16); } +#endif + + /** Initializes a ray cone struct. + \param[in] width The width of the ray cone. + \param[in] angle The angle of the ray cone. + */ + void __init(float width, float angle) + { +#ifndef USE_RAYCONES_WITH_FP16_IN_RAYPAYLOAD + this.width = width; + this.spreadAngle = angle; +#else + this.widthSpreadAngleFP16 = (f32tof16(width) << 16) | f32tof16(angle); +#endif + } + static RayCone make(float width, float angle) { RayCone ret; ret.__init(width, angle); return ret; } + + /** Propagate the raycone to the next hit point (hitT distance away). + \param[in] hitT Distance to the hit point. + \return The propagated ray cone. + */ + RayCone propagateDistance(float hitT) + { + float angle = getSpreadAngle(); + float width = getWidth(); + return RayCone::make(angle * hitT + width, angle); + } + + /** Add surface spread angle to the current RayCone and returns the updated RayCone. + \param[in] surfaceSpreadAngle Angle to be added. + \return The updated ray cone. + */ + RayCone addToSpreadAngle(float surfaceSpreadAngle) + { + float angle = getSpreadAngle(); + return RayCone::make(getWidth(), angle + surfaceSpreadAngle); + } + + /** Compute texture level of details based on ray cone. Commented out, since we handle texture resolution as part of the texture lookup in Falcor. + Keeping this here for now, since other may find it easier to understand. + Note: call propagateDistance() before computeLOD() + */ + float computeLOD(float triLODConstant, float3 rayDir, float3 normal, float textureWidth, float textureHeight, uniform bool moreDetailOnSlopes = false) + { + float lambda = triLODConstant; // Constant per triangle. + float filterWidth = getWidth(); + float distTerm = abs(filterWidth); + float normalTerm = abs(dot(rayDir, normal)); + if( moreDetailOnSlopes ) normalTerm = sqrt( normalTerm ); + lambda += 0.5f * SafeLog2(textureWidth * textureHeight * distTerm / normalTerm); + return lambda; + } + + /** Compute texture level of details based on ray cone. + Note that this versions excludes texture dimension dependency, which is instead added back in + using the ExplicitRayConesLodTextureSampler:ITextureSampler in order to support baseColor, specular, etc per surfaces. + \param[in] triLODConstant Value computed by computeRayConeTriangleLODValue(). + \param[in] rayDir Ray direction. + \param[in] normal Normal at the hit point. + \return The level of detail, lambda. + */ + float computeLOD(float triLODConstant, float3 rayDir, float3 normal, uniform bool moreDetailOnSlopes = false) // Note: call propagateDistance() before computeLOD() + { + float lambda = triLODConstant; // constant per triangle + float filterWidth = getWidth(); + float distTerm = abs(filterWidth); + float normalTerm = abs(dot(rayDir, normal)); + if( moreDetailOnSlopes ) normalTerm = sqrt( normalTerm ); + lambda += SafeLog2(distTerm / normalTerm); + return lambda; + } +}; + +/** Compute the triangle LOD value based on triangle vertices and texture coordinates, used by ray cones. + \param[in] vertices Triangle vertices. + \param[in] txcoords Texture coordinates at triangle vertices. + \param[in] worldMat 3x3 world matrix. + \return Triangle LOD value. +*/ +float computeRayConeTriangleLODValue(float3 vertices[3], float2 txcoords[3], float3x3 worldMat) +{ + float2 tx10 = txcoords[1] - txcoords[0]; + float2 tx20 = txcoords[2] - txcoords[0]; + float Ta = abs(tx10.x * tx20.y - tx20.x * tx10.y); + + // We need the area of the triangle, which is length(triangleNormal) in worldspace, and + // could not figure out a way with fewer than two 3x3 mtx multiplies for ray cones. + float3 edge01 = mul(vertices[1] - vertices[0], worldMat); + float3 edge02 = mul(vertices[2] - vertices[0], worldMat); + + float3 triangleNormal = cross(edge01, edge02); // In world space, by design. + float Pa = length(triangleNormal); // Twice the area of the triangle. + return 0.5f * SafeLog2(Ta / Pa); // Value used by texture LOD cones model. +} + +/** Compute screen space spread angle at the first hit point based on ddx and ddy of normal and position. + \param[in] positionW Position of the hit point in world space. + \param[in] normalW Normal of the hit point in world space. + \return Spread angle at hit point. +*/ +float computeScreenSpaceSurfaceSpreadAngle(float3 positionW, float3 normalW) +{ + float3 dNdx = ddx(normalW); + float3 dNdy = ddy(normalW); + float3 dPdx = ddx(positionW); + float3 dPdy = ddy(positionW); + + float beta = sqrt(dot(dNdx, dNdx) + dot(dNdy, dNdy)) * sign(dot(dNdx, dPdx) + dot(dNdy, dPdy)); + return beta; +} + +/** Compute screen space spread angle at the first hit point based on ddx and ddy of normal and position. + \param[in] rightVector The difference vector between normalized eye ray direction at (x + 1, y) and (x, y). + \param[in] cameraUpVector The difference vector between normalized eye ray direction at (x, y + 1) and (x, y). + \param[in] dNdx Differential normal in the x-direction. + \param[in] dNdy Differential normal in the y-direction. + \return Spread angle at hit point. +*/ +float computeScreenSpaceSurfaceSpreadAngle(float3 rightVector, float3 upVector, float3 dNdx, float3 dNdy) +{ + float betaX = atan(length(dNdx)); + float betaY = atan(length(dNdy)); + float betaCurvature = sqrt(betaX * betaX + betaY * betaY) * (betaX >= betaY ? sign(dot(rightVector, dNdx)) : sign(dot(upVector, dNdy))); + return betaCurvature; +} + +/** Compute spread from estimated curvature from a triangle for ray cones. + \param[in] curvature Curvature value. + \param[in] rayConeWidth The width of the ray cone. + \param[in] rayDir The ray direction. + \param[in] normal The normal. + \return Spread angle. +*/ +float computeSpreadAngleFromCurvatureIso(float curvature, float rayConeWidth, float3 rayDir, float3 normal) +{ + float dn = -dot(rayDir, normal); + dn = abs(dn) < 1.0e-5 ? sign(dn) * 1.0e-5 : dn; + +#if TEXLOD_SPREADANGLE_FROM_CURVATURE_MODE == TEXLOD_SPREADANGLE_RTG1 + // Original approach. + float s = sign(curvature); + float curvatureScaled = curvature * rayConeWidth * 0.5 / dn; + float surfaceSpreadAngle = 2.0 * atan(abs(curvatureScaled) / sqrt(2.0)) * s; +#elif TEXLOD_SPREADANGLE_FROM_CURVATURE_MODE == TEXLOD_SPREADANGLE_ARC_LENGTH_UNOPTIMIZED + // New approach, unoptimized: https://www.math24.net/curvature-plane-curves/ + + float r = 1.0 / (curvature); + float chord = (rayConeWidth) / (dn); + float arcLength = asin(chord / (2.0 * r)) * (2.0 * r); + float deltaPhi = (curvature) * (arcLength); + + float surfaceSpreadAngle = deltaPhi; +#else // TEXLOD_SPREADANGLE_FROM_CURVATURE_MODE == TEXLOD_SPREADANGLE_ARC_LENGTH_OPTIMIZED + // New approach : Fast Approximation. + float deltaPhi = (curvature * rayConeWidth / dn); + float surfaceSpreadAngle = deltaPhi; +#endif + + return surfaceSpreadAngle; +} + +/** Exploit ray cone to compute an approximate anisotropic filter. The idea is to find the width (2*radius) of the ray cone at + the intersection point, and approximate the ray cone as a cylinder at that point with that radius. Then intersect the + cylinder with the triangle plane to find the ellipse of anisotropy. Finally, convert to gradients in texture coordinates. + \param[in] intersectionPoint The intersection point. + \param[in] faceNormal The normal of the triangle. + \param[in] rayConeDir Direction of the ray cone. + \param[in] rayConeWidthAtIntersection Width of the cone at the intersection point (use: raycone.getWidth()). + \param[in] positions Positions of the triangle. + \param[in] txcoords Texture coordinates of the vertices of the triangle. + \param[in] interpolatedTexCoordsAtIntersection Interpolated texture coordinates at the intersection point. + \param[in] texGradientX First gradient of texture coordinates, which can be fed into SampleGrad(). + \param[in] texGradientY Second gradient of texture coordinates, which can be fed into SampleGrad(). +*/ +void computeAnisotropicEllipseAxes(float3 intersectionPoint, float3 faceNormal, float3 rayConeDir, + float rayConeRadiusAtIntersection, float3 positions[3], float2 txcoords[3], float2 interpolatedTexCoordsAtIntersection, + out float2 texGradientX, out float2 texGradientY) +{ + // Compute ellipse axes. + float3 ellipseAxis0 = rayConeDir - dot(faceNormal, rayConeDir) * faceNormal; // Project rayConeDir onto the plane. + float3 rayDirPlaneProjection0 = ellipseAxis0 - dot(rayConeDir, ellipseAxis0) * rayConeDir; // Project axis onto the plane defined by the ray cone dir. + ellipseAxis0 *= rayConeRadiusAtIntersection / max(0.0001f, length(rayDirPlaneProjection0)); // Using uniform triangles to find the scale. + + float3 ellipseAxis1 = cross(faceNormal, ellipseAxis0); + float3 rayDirPlaneProjection1 = ellipseAxis1 - dot(rayConeDir, ellipseAxis1) * rayConeDir; + ellipseAxis1 *= rayConeRadiusAtIntersection / max(0.0001f, length(rayDirPlaneProjection1)); + + // Compute texture coordinate gradients. + float3 edgeP; + float u, v, Atriangle, Au, Av; + float3 d = intersectionPoint - positions[0]; + float3 edge01 = positions[1] - positions[0]; + float3 edge02 = positions[2] - positions[0]; + float oneOverAreaTriangle = 1.0f / dot(faceNormal, cross(edge01, edge02)); + + // Compute barycentrics. + edgeP = d + ellipseAxis0; + u = dot(faceNormal, cross(edgeP, edge02)) * oneOverAreaTriangle; + v = dot(faceNormal, cross(edge01, edgeP)) * oneOverAreaTriangle; + texGradientX = (1.0f - u - v) * txcoords[0] + u * txcoords[1] + v * txcoords[2] - interpolatedTexCoordsAtIntersection; + + edgeP = d + ellipseAxis1; + u = dot(faceNormal, cross(edgeP, edge02)) * oneOverAreaTriangle; + v = dot(faceNormal, cross(edge01, edgeP)) * oneOverAreaTriangle; + texGradientY = (1.0f - u - v) * txcoords[0] + u * txcoords[1] + v * txcoords[2] - interpolatedTexCoordsAtIntersection; +} + +/** Refracts a ray and handles total internal reflection (TIR) in 3D. + \param[in] rayDir The ray direction to be refracted. + \param[in] normal The normal at the hit point. + \param[in] eta The raio of indices of refraction (entering / exiting). + \param[out] refractedRayDir The refracted vector. + \return Returns false if total internal reflection occured, otherwise true. +*/ +bool refractWithTIR(float3 rayDir, float3 normal, float eta, out float3 refractedRayDir) +{ + float NdotD = dot(normal, rayDir); + float k = 1.0f - eta * eta * (1.0f - NdotD * NdotD); + if (k < 0.0f) + { + refractedRayDir = float3(0.0, 0.0, 0.0); + return false; + } + else + { + refractedRayDir = rayDir * eta - normal * (eta * NdotD + sqrt(k)); + return true; + } +} + +/** Refracts a ray and handles total internal reflection (TIR) in 2D. + \param[in] rayDir The ray direction to be refracted. + \param[in] normal The normal at the hit point. + \param[in] eta The raio of indices of refraction (entering / exiting). + \param[out] refractedRayDir The refracted vector. + \return Returns false if total internal reflection occured, otherwise true. +*/ +bool refractWithTIR(float2 rayDir, float2 normal, float eta, out float2 refractedRayDir) +{ + float NdotD = dot(normal, rayDir); + float k = 1.0f - eta * eta * (1.0f - NdotD * NdotD); + if (k < 0.0f) + { + refractedRayDir = float2(0.0,0.0); + return false; + } + else + { + refractedRayDir = rayDir * eta - normal * (eta * NdotD + sqrt(k)); + return true; + } +} + +/** Helper function rotate a vector by both +angle and -angle. + \param[in] vec A vector to be rotated. + \param[in] angle The angle used for rotation. + \param[out] rotatedVecPlus The in vector rotated by +angle. + \param[out] rotatedVecMinus The in vector rotated by -angle. +*/ +void rotate2DPlusMinus(float2 vec, float angle, out float2 rotatedVecPlus, out float2 rotatedVecMinus) +{ + float c = cos(angle); + float s = sin(angle); + float cx = c * vec.x; + float sy = s * vec.y; + float sx = s * vec.x; + float cy = c * vec.y; + rotatedVecPlus = float2(cx - sy, +sx + cy); // Rotate +angle, + rotatedVecMinus = float2(cx + sy, -sx + cy); // Rotate -angle. +} + +/** Helper function that returns an orthogonal vector to the in vector: 90 degrees counter-clockwise rotation. + \param[in] vec A vector to be rotate 90 degrees counter-clockwise. + \return The in vector rotated 90 degrees counter-clockwise. +*/ +float2 orthogonal(float2 vec) +{ + return float2(-vec.y, vec.x); +} + +/** Computes RayCone for a given refracted ray direction. Note that the incident ray cone should be called with propagateDistance(hitT); before computeRayConeForRefraction() is called. + \param[in,out] rayCone A ray cone to be refracted, result is returned here as well. + \param[in] rayOrg Ray origin. + \param[in] rayDir Ray direction. + \param[in] hitPoint The hit point. + \param[in] normal The normal at the hit point. + \param[in] normalSpreadAngle The spread angle at the normal at the hit point. + \param[in] eta Ratio of indices of refraction (enteringIndexOfRefraction / exitingIndexOfRefraction). + \param[in] refractedRayDir The refracted ray direction. +*/ +void computeRayConeForRefraction(inout RayCone rayCone, float3 rayOrg, float3 rayDir, float3 hitPoint, float3 normal, float normalSpreadAngle, + float eta, float3 refractedRayDir) +{ + // We have refractedRayDir, which is the direction of the refracted ray cone, + // but we also need the rayCone.width and the rayCone.spreadAngle. These are computed in 2D, + // with xAxis and yAxis as the 3D axes. hitPoint is the origin of this 2D coordinate system. + float3 xAxis = normalize(rayDir - normal * dot(normal, rayDir)); + float3 yAxis = normal; + + float2 refractedDir2D = float2(dot(refractedRayDir, xAxis), dot(refractedRayDir, yAxis)); // Project to 2D. + float2 incidentDir2D = float2(dot(rayDir, xAxis), dot(rayDir, yAxis)); // Project to 2D. + float2 incidentDir2D_u, incidentDir2D_l; // Upper (_u) and lower (_l) line of ray cone in 2D. + float2 incidentDirOrtho2D = orthogonal(incidentDir2D); + + float widthSign = rayCone.getWidth() > 0.0f ? 1.0f : -1.0f; + + rotate2DPlusMinus(incidentDir2D, rayCone.getSpreadAngle() * widthSign * 0.5f, incidentDir2D_u, incidentDir2D_l); + + // Note: since we assume that the incident ray cone has been propagated to the hitpoint, we start the width-vector + // from the origin (0,0), and so, we do not need to add rayOrigin2D to tu and tl. + float2 tu = +incidentDirOrtho2D * rayCone.getWidth() * 0.5f; // Top, upper point on the incoming ray cone (in 2D). + float2 tl = -tu; // Top, lower point on the incoming ray cone (in 2D). + // Intersect 2D rays (tu + t * incidentDir2D_u, and similar for _l) with y = 0. + // Optimized becuase y will always be 0.0f, so only need to compute x. + float hitPoint_u_x = tu.x + incidentDir2D_u.x * (-tu.y / incidentDir2D_u.y); + float hitPoint_l_x = tl.x + incidentDir2D_l.x * (-tl.y / incidentDir2D_l.y); + + float normalSign = hitPoint_u_x > hitPoint_l_x ? +1.0f : -1.0f; + + float2 normal2D = float2(0.0f, 1.0f); + float2 normal2D_u, normal2D_l; + + rotate2DPlusMinus(normal2D, -normalSpreadAngle * normalSign * 0.5f, normal2D_u, normal2D_l); + + // Refract in 2D. + float2 refractedDir2D_u, refractedDir2D_l; + if (!refractWithTIR(incidentDir2D_u, normal2D_u, eta, refractedDir2D_u)) + { + refractedDir2D_u = incidentDir2D_u - normal2D_u * dot(normal2D_u, incidentDir2D_u); + refractedDir2D_u = normalize(refractedDir2D_u); + } + if (!refractWithTIR(incidentDir2D_l, normal2D_l, eta, refractedDir2D_l)) + { + refractedDir2D_l = incidentDir2D_l - normal2D_l * dot(normal2D_l, incidentDir2D_l); + refractedDir2D_l = normalize(refractedDir2D_l); + } + + float signA = (refractedDir2D_u.x * refractedDir2D_l.y - refractedDir2D_u.y * refractedDir2D_l.x) * normalSign < 0.0f ? +1.0f : -1.0f; + float spreadAngle = acos(dot(refractedDir2D_u, refractedDir2D_l)) * signA; + + // Now compute the width of the refracted cone. + float2 refractDirOrtho2D = orthogonal(refractedDir2D); + + // Intersect line (0,0) + t * refractDirOrtho2D with the line: hitPoint_u + s * refractedDir2D_u, but optimized since hitPoint_ul.y=0. + float width = (-hitPoint_u_x * refractedDir2D_u.y) / dot(refractDirOrtho2D, orthogonal(refractedDir2D_u)); + // Intersect line (0,0) + t * refractDirOrtho2D with the line: hitPoint_l + s * refractedDir2D_l. + width += (hitPoint_l_x * refractedDir2D_l.y) / dot(refractDirOrtho2D, orthogonal(refractedDir2D_l)); + + rayCone = RayCone::make(width, spreadAngle); +} + +/** Refracts a ray cone. Note that teh incident ray cone should be called with propagate(0.0f, hitT); before refractRayCone() is called. + \param[in,out] rayCone A ray cone to be refracted, result is returned here as well. + \param[in] rayOrg Ray origin. + \param[in] rayDir Ray direction. + \param[in] hitPoint The hit point. + \param[in] normal The normal at the hit point. + \param[in] normalSpreadAngle The spread angle at the normal at the hit point. + \param[in] eta Ratio of indices of refraction (enteringIndexOfRefraction / exitingIndexOfRefraction). + \param[out] refractedRayDir The refracted ray direction (unless the ray was totally internally reflcted (TIR:ed). + \return Whether the ray was not totally internally reflected, i.e., returns true without TIR, and false in cases of TIR +*/ +bool refractRayCone(inout RayCone rayCone, float3 rayOrg, float3 rayDir, float3 hitPoint, float3 normal, float normalSpreadAngle, + float eta, out float3 refractedRayDir) +{ + if (!refractWithTIR(rayDir, normal, eta, refractedRayDir)) + { + return false; // total internal reflection + } + + computeRayConeForRefraction(rayCone, rayOrg, rayDir, hitPoint, normal, normalSpreadAngle, eta, refractedRayDir); + + return true; +} + +// Experimental ray cone spread heuristic: assume pdf comes from an uniform sphere cap lobe. Then we can compute cone spread +// angle alpha (a plane angle) from the uniform sphere cap solid angle (omega), which can be derived from pdf +// (omega = 1 / uniform_sphere_cap_pdf). +// The formula is alpha = 2 * acos( 1 - omega / 2*PI ) - see https://rechneronline.de/winkel/solid-angle.php +// (This heuristic starts to break down for BSDFs with overlapping lobes but seems good enough in most cases - perhaps BSDF should be responsible providing the scatter angle). +// +// growthFactor 0.3 is very conservative underestimation, see https://www.jcgt.org/published/0010/01/01/paper.pdf, "Improved Shader and Texture Level of Detail Using Ray Cones", +// Chapter 3. Curvature Approximations "...On the other hand, when ray cones are used inside a Monte Carlo path tracer, one would prefer slightly underestimating the +// spread angle, since antialiasing will be handled by stochastic supersampling anyway, and the main objective would be to avoid introducing overblur in the results." +float ComputeRayConeSpreadAngleExpansionByScatterPDF(float pdf) +{ + const float minPDF = 0.0001; + pdf = max(pdf, minPDF); + return sqrt(1.0 / pdf); +} + +#endif // __TEX_LOD_HELPERS_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Materials/Transmission.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Materials/Transmission.hlsli new file mode 100644 index 0000000000..d9ecb2cc89 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Materials/Transmission.hlsli @@ -0,0 +1,124 @@ +/* +* Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +#ifndef __TRANSMISSION_HLSLI__ +#define __TRANSMISSION_HLSLI__ + +#include "Raytracing/Includes/MathHelpers.hlsli" +#include "Raytracing/Includes/Materials/SubsurfaceMaterial.hlsli" + +float3 SampleHemisphere(float2 u, out float pdf) +{ + const float a = sqrt(u.x); + const float b = K_2PI * u.y; + + const float3 result = float3( + a * cos(b), + a * sin(b), + sqrt(1.0f - u.x)); + + pdf = result.z * K_1_PI; + + return result; +} + +// Evaluate Lamberian Diffuse BRDF +float3 EvalLambertianBRDF(const float3 N, const float3 L, const float3 diffuseAlbedo) +{ + const float NoL = min(max(1e-5f, dot(N, L)), 1.0f); + return diffuseAlbedo * (K_1_PI * NoL).xxx; +} + +/// Calculates Beer-Lambert attenuation at a specified distance through a medium with a specified attenuation coefficient. +float3 EvalBeerLambertAttenuation(in const float3 attenuationCoefficient, in const float distance) +{ + return exp(-attenuationCoefficient * distance); +} + +float3 SampleDirectionHenyeyGreenstein(float2 rndSample, in float g, in float3 wo) +{ + float cosTheta; + if (abs(g) < 1e-3f) + { + cosTheta = 1 - 2 * rndSample.x; + } + else + { + const float sqrTerm = (1 - g * g) / (1 - g + 2 * g * rndSample.x); + cosTheta = (1 + g * g - sqrTerm * sqrTerm) / (2 * g); + } + + // Compute direction for Henyey-Greenstein sample + const float sinTheta = sqrt(max((float) 0, 1 - cosTheta * cosTheta)); + const float phi = K_2PI * rndSample.y; + float3 x, y; + const float3 z = wo; + CreateCoordinateSystemFromZ(true, z, x, y); + const float3 wi = SphericalDirection(sinTheta, cosTheta, phi, x, y, z); + return wi; +} + +float3 CalculateRefractionRay( + in const SubsurfaceInteraction subsurfaceInteraction, + in const float2 rand2) +{ + // Note: We are doing cosine lobe importance sampling by default, we don't need the pdf because it will be canceled out with BSDF + // In case you are using other refraction sampling methods, you need to write your own function to generate refraction ray and calculate PDF + float bsdfSamplePdf = 0.0f; + const float3 sampleDirectionLocal = SampleHemisphere(rand2, bsdfSamplePdf); + + const float3x3 tangentBasis = float3x3(subsurfaceInteraction.tangent, -subsurfaceInteraction.biTangent, -subsurfaceInteraction.normal); + // Note: The tangentBasis is an orthogonal matrix, so we can just do transpose to get the inverse matrix. + // This also avoids the issue that HLSL doesn't have inverse matrix intrinsics. + const float3x3 tangentToWorld = transpose(tangentBasis); + const float3 refractedRayDirection = mul(tangentToWorld, sampleDirectionLocal); + + return refractedRayDirection; +} + +float3 EvaluateBoundaryTerm( + in const float3 normal, + in const float3 vectorToLight, + in const float3 refractedRayDirection, + in const float3 backfaceNormal, + in const float thickness, + in const SubsurfaceMaterialCoefficients sssMaterialCoeffcients) +{ + const float3 boundaryBsdf = EvalLambertianBRDF(backfaceNormal, vectorToLight, sssMaterialCoeffcients.albedo); + const float3 frontLambertBsdf = EvalLambertianBRDF(-normal, refractedRayDirection, sssMaterialCoeffcients.albedo); + const float3 volumetricAttenuation = EvalBeerLambertAttenuation(sssMaterialCoeffcients.sigma_t, thickness); + + return boundaryBsdf * volumetricAttenuation * frontLambertBsdf; +} + +float3 EvaluateSingleScattering( + in const float3 vectorToLight, + in const float3 scatteringBoundaryNormal, + in const float totalScatteringDistance, + in const SubsurfaceMaterialCoefficients sssMaterialCoeffcients) +{ + const float3 scatteringBoundaryBsdf = EvalLambertianBRDF(scatteringBoundaryNormal, vectorToLight, sssMaterialCoeffcients.albedo); + const float3 volumetricAttenuation = EvalBeerLambertAttenuation(sssMaterialCoeffcients.sigma_t, totalScatteringDistance); + return sssMaterialCoeffcients.sigma_s * scatteringBoundaryBsdf * volumetricAttenuation; +} + +#endif // __TRANSMISSION_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/MathConstants.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/MathConstants.hlsli new file mode 100644 index 0000000000..cb5b8e1477 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/MathConstants.hlsli @@ -0,0 +1,75 @@ +#ifndef __MATH_CONSTANTS_HLSLI__ +#define __MATH_CONSTANTS_HLSLI__ + +// Constants from +#define K_E 2.71828182845904523536 // e +#define K_LOG2E 1.44269504088896340736 // log2(e) +#define K_LOG10E 0.434294481903251827651 // log10(e) +#define K_LN2 0.693147180559945309417 // ln(2) +#define K_LN10 2.30258509299404568402 // ln(10) +#define K_PI 3.14159265358979323846 // pi +#define K_PI_2 1.57079632679489661923 // pi/2 +#define K_PI_4 0.785398163397448309616 // pi/4 +#define K_1_PI 0.318309886183790671538 // 1/pi +#define K_2_PI 0.636619772367581343076 // 2/pi +#define K_2_SQRTPI 1.12837916709551257390 // 2/sqrt(pi) +#define K_SQRT2 1.41421356237309504880 // sqrt(2) +#define K_SQRT1_2 0.707106781186547524401 // 1/sqrt(2) + +// Additional constants +#define K_2PI 6.28318530717958647693 // 2pi +#define K_4PI 12.5663706143591729539 // 4pi +#define K_4_PI 1.27323954473516268615 // 4/pi +#define K_1_2PI 0.159154943091895335769 // 1/2pi +#define K_1_4PI 0.079577471545947667884 // 1/4pi +#define K_SQRTPI 1.77245385090551602730 // sqrt(pi) +#define K_1_SQRT2 0.707106781186547524401 // 1/sqrt(2) + +// Numeric limits from +#define UINT32_MAX 4294967295 +#define INT32_MIN -2147483648 +#define INT32_MAX 2147483647 + +// Numeric limits from +#define DBL_DECIMAL_DIG 17 // # of decimal digits of rounding precision +#define DBL_DIG 15 // # of decimal digits of precision +#define DBL_EPSILON 2.2204460492503131e-016 // smallest such that 1.0+DBL_EPSILON != 1.0 +#define DBL_HAS_SUBNORM 1 // type does support subnormal numbers +#define DBL_MANT_DIG 53 // # of bits in mantissa +#define DBL_MAX 1.7976931348623158e+308 // max value +#define DBL_MAX_10_EXP 308 // max decimal exponent +#define DBL_MAX_EXP 1024 // max binary exponent +#define DBL_MIN 2.2250738585072014e-308 // min positive value +#define DBL_MIN_10_EXP (-307) // min decimal exponent +#define DBL_MIN_EXP (-1021) // min binary exponent +#define DBL_RADIX 2 // exponent radix +#define DBL_TRUE_MIN 4.9406564584124654e-324 // min positive value + +#define FLT_DECIMAL_DIG 9 // # of decimal digits of rounding precision +#define FLT_DIG 6 // # of decimal digits of precision +#define FLT_EPSILON 1.192092896e-07F // smallest such that 1.0+FLT_EPSILON != 1.0 +#define FLT_HAS_SUBNORM 1 // type does support subnormal numbers +#define FLT_GUARD 0 +#define FLT_MANT_DIG 24 // # of bits in mantissa +#define FLT_MAX 3.402823466e+38F // max value +#define FLT_MAX_10_EXP 38 // max decimal exponent +#define FLT_MAX_EXP 128 // max binary exponent +#define FLT_MIN 1.175494351e-38F // min normalized positive value +#define FLT_MIN_10_EXP (-37) // min decimal exponent +#define FLT_MIN_EXP (-125) // min binary exponent +#define FLT_NORMALIZE 0 +#define FLT_RADIX 2 // exponent radix +#define FLT_TRUE_MIN 1.401298464e-45F // min positive value + +// Numeric limits for half (IEEE754 binary16) +#define HLF_EPSILON 9.765625e-04F // smallest such that 1.0+HLF_EPSILON != 1.0 +#define HLF_HAS_SUBNORM 1 // type does support subnormal numbers +#define HLF_MANT_DIG 11 +#define HLF_MAX 6.5504e+4F // max value +#define HLF_MAX_EXP 16 // max binary exponent +#define HLF_MIN 6.097555160522461e-05F // min normalized positive value +#define HLF_MIN_EXP (-14) // min binary exponent +#define HLF_RADIX 2 +#define HLF_TRUE_MIN 5.960464477539063e-08F // min positive value + +#endif // __MATH_CONSTANTS_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/MathHelpers.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/MathHelpers.hlsli new file mode 100644 index 0000000000..4d999d2fbb --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/MathHelpers.hlsli @@ -0,0 +1,276 @@ +#ifndef __MATH_HELPERS_HLSLI__ +#define __MATH_HELPERS_HLSLI__ + +#include "Raytracing/Includes/MathConstants.hlsli" + +inline float Luminance(float3 rgb) +{ + return dot(rgb, float3(0.2126f, 0.7152f, 0.0722f)); +} + +inline float Average(float3 rgb) +{ + return (rgb.x+rgb.y+rgb.z) / 3.0; +} + +float Sqrt01(float x) +{ + return max(sqrt(saturate(x)), 1e-7); +} + +// Safe sqrt for x +float Sqrt0(float x) +{ + return sqrt(max(x, 1e-7)); +} + +float3 Sqrt0(float3 x) +{ + return sqrt(max(x, 1e-7)); +} + +float Atan2safe(float x, float y) +{ + return abs(x) + abs(y) < 1e-7 ? 0 : atan2(x, y); +} + +float I0(float x) +{ + float val = 0.f; + float x2i = 1.f; + float ifact = 1.f; + uint i4 = 1; + + [unroll] + for (uint i = 0; i < 10; i++) + { + if (i > 1) + ifact *= i; + val += x2i / (ifact * ifact * i4); + x2i *= x * x; + i4 *= 4; + } + return val; +} + +float LogI0(float x) +{ + if (x > 12) + { + return x + 0.5f * (-log(K_2PI) + log(1.f / x) + 0.125f / x); + } + else + { + return log(I0(x)); + } +} + +float PhiFunction(int p, float gammaI, float gammaT) +{ + return 2.f * p * gammaT - 2.f * gammaI + p * K_PI; +} + +float Logistic(float x, float s) +{ + x = abs(x); + float tmp = exp(-x / s); + return tmp / (s * (1.f + tmp) * (1.f + tmp)); +} + +float LogisticCDF(float x, float s) +{ + return 1.f / (1.f + exp(-x / s)); +} + +float TrimmedLogistic(float x, float s, float a, float b) +{ + return Logistic(x, s) / (LogisticCDF(b, s) - LogisticCDF(a, s)); +} + +float SampleTrimmedLogistic(float u, float s, float a, float b) +{ + float k = LogisticCDF(b, s) - LogisticCDF(a, s); + float x = -s * log(1.f / (u * k + LogisticCDF(a, s)) - 1.f); + return clamp(x, a, b); +} + +// 1D Gaussian distribution normalized over [-inf,inf] +float Gaussian1D(const float x, const float stddev) +{ + return exp(-x * x / (2.0f * stddev * stddev)) / (stddev * sqrt(2.0f * K_PI)); +} + +float PhiR(const float h) +{ + return -2.0 * asin(h); +} + +float PhiTT(const float h, const float a) // a = 1.0 / eta_prime +{ + return K_PI - 2.0 * asin(h) + 2.0 * asin(h * a); +} + +float PhiTRT(const float h, const float a) // a = 1.0 / eta_prime +{ + return -2.0 * asin(h) + 4.0 * asin(h * a); +} + +// sample from normal distribution (Box-Muller transform) +float RandomGaussian1D(const float xi1, const float xi2) +{ + return sqrt(2.0f) * cos(2.0f * K_PI * xi1) * Sqrt0(-log(1 - xi2)); +} + +float2 PolarToCartesian(float r, float theta) +{ + return r * float2(cos(theta), sin(theta)); +} + +void CreateCoordinateSystemFromZ(bool rightHand, float3 zAxis, out float3 xAxis, out float3 yAxis) +{ + float yz = -zAxis.y * zAxis.z; + yAxis = normalize(abs(zAxis.z) > 0.9999 ? float3(-zAxis.x * zAxis.y, 1.f - zAxis.y * zAxis.y, yz) : + float3(-zAxis.x * zAxis.z, yz, 1.f - zAxis.z * zAxis.z)); + xAxis = rightHand ? cross(yAxis, zAxis) : cross(zAxis, yAxis); +} + +// Spherical to Cartesian in the basis x, y, z +// z is up +float3 SphericalDirection(float sinTheta, float cosTheta, float phi, float3 x, float3 y, float3 z) +{ + return sinTheta * cos(phi) * x + sinTheta * sin(phi) * y + cosTheta * z; +} + +float3 CalculateDiskSamplePosition( + in const float rand, + in const float r, + in float3 centerPos, + in float3 tangent, + in float3 biTangent) +{ + // Sample Disk + const float theta = rand * K_2PI; + const float2 diskSample = PolarToCartesian(r, theta); + + return centerPos + tangent * diskSample.xxx + biTangent * diskSample.yyy; +} + +/** Generate a vector that is orthogonal to the input vector. + This can be used to invent a tangent frame for meshes that don't have real tangents/bitangents. + \param[in] u Unit vector. + \return v Unit vector that is orthogonal to u. +*/ +float3 perp_stark(float3 u) +{ + // TODO: Validate this and look at numerical precision etc. Are there better ways to do it? + float3 a = abs(u); + uint uyx = (a.x - a.y) < 0 ? 1 : 0; + uint uzx = (a.x - a.z) < 0 ? 1 : 0; + uint uzy = (a.y - a.z) < 0 ? 1 : 0; + uint xm = uyx & uzx; + uint ym = (1 ^ xm) & uzy; + uint zm = 1 ^ (xm | ym); // 1 ^ (xm & ym) + float3 v = normalize(cross(u, float3(xm, ym, zm))); + return v; +} +// fp16 variant +half3 perp_stark(half3 u) +{ + // TODO: Validate this and look at numerical precision etc. Are there better ways to do it? + half3 a = abs(u); + uint uyx = (a.x - a.y) < 0 ? 1 : 0; + uint uzx = (a.x - a.z) < 0 ? 1 : 0; + uint uzy = (a.y - a.z) < 0 ? 1 : 0; + uint xm = uyx & uzx; + uint ym = (1 ^ xm) & uzy; + uint zm = 1 ^ (xm | ym); // 1 ^ (xm & ym) + half3 v = normalize(cross(u, half3(xm, ym, zm))); + return v; +} + +/** Uniform sampling of the unit disk using polar coordinates. + \param[in] u Uniform random number in [0,1)^2. + \return Sampled point on the unit disk. +*/ +float2 sample_disk(float2 u) +{ + float2 p; + float r = sqrt(u.x); + float phi = K_2PI * u.y; + p.x = r * cos(phi); + p.y = r * sin(phi); + return p; +} + +/** Uniform sampling of direction within a cone + \param[in] u Uniform random number in [0,1)^2. + \param[in] cosTheta Cosine of the cone half-angle + \return Sampled direction within the cone with (0,0,1) axis +*/ +float3 sample_cone(float2 u, float cosTheta) +{ + float z = u.x * (1.f - cosTheta) + cosTheta; + float r = sqrt(1.f - z*z); + float phi = K_2PI * u.y; + return float3(r * cos(phi), r * sin(phi), z); +} + +/** Uniform sampling of the unit sphere using spherical coordinates. + \param[in] u Uniform random numbers in [0,1)^2. + \return Sampled point on the unit sphere. +*/ +float3 sample_sphere(float2 u) +{ + float phi = K_2PI * u.y; + float cosTheta = 1.0f - 2.0f * u.x; + float sinTheta = sqrt(max(0.0f, 1.0f - cosTheta * cosTheta)); + return float3(sinTheta * cos(phi), sinTheta * sin(phi), cosTheta); +} + +/** Uniform sampling of the unit hemisphere using sphere sampling. + \param[in] u Uniform random numbers in [0,1)^2. + \return Sampled point on the unit hemisphere. +*/ +float3 sample_hemisphere(float2 u) +{ + float3 w = sample_sphere(u); + w.z = abs(w.z); + return w; +} + +/** Uniform sampling of the unit disk using Shirley's concentric mapping. + \param[in] u Uniform random numbers in [0,1)^2. + \return Sampled point on the unit disk. +*/ +float2 sample_disk_concentric(float2 u) +{ + u = 2.f * u - 1.f; + if (u.x == 0.f && u.y == 0.f) return u; + float phi, r; + if (abs(u.x) > abs(u.y)) + { + r = u.x; + phi = (u.y / u.x) * K_PI_4; + } + else + { + r = u.y; + phi = K_PI_2 - (u.x / u.y) * K_PI_4; + } + return r * float2(cos(phi), sin(phi)); +} + +/** Cosine-weighted sampling of the hemisphere using Shirley's concentric mapping. + \param[in] u Uniform random numbers in [0,1)^2. + \param[out] pdf Probability density of the sampled direction (= cos(theta)/pi). + \return Sampled direction in the local frame (+z axis up). +*/ +float3 sample_cosine_hemisphere_concentric(float2 u, out float pdf) +{ + float2 d = sample_disk_concentric(u); + float z = sqrt(max(0.f, 1.f - dot(d, d))); + pdf = z * K_1_PI; + return float3(d, z); +} + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/MonteCarlo.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/MonteCarlo.hlsli new file mode 100644 index 0000000000..b31f04fc1a --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/MonteCarlo.hlsli @@ -0,0 +1,417 @@ +#ifndef MONTE_CARLO_HLSL +#define MONTE_CARLO_HLSL + +#include "Common/BRDF.hlsli" +#include "Common/Math.hlsli" +#include "Raytracing/Includes/RT/CommonRT.hlsli" +#include "Raytracing/Includes/Surface.hlsli" + +namespace MonteCarlo +{ + struct BRDFWeight + { + float3 diffuse; + float3 specular; + float3 transmission; + + float3 total() + { + return diffuse + specular + transmission; + } + }; + + // The following functions bellow all come from NVidia + float CalcLuminance(float3 color) + { + return dot(color.xyz, float3(0.299f, 0.587f, 0.114f)); + } + + float2 Hammersley( uint Index, uint NumSamples, uint2 Random ) + { + float E1 = frac( (float)Index / NumSamples + float( Random.x & 0xffff ) / (1<<16) ); + float E2 = float( reversebits(Index) ^ Random.y ) * 2.3283064365386963e-10; + return float2( E1, E2 ); + } + + float2 Hammersley16( uint Index, uint NumSamples, uint2 Random ) + { + float E1 = frac( (float)Index / NumSamples + float( Random.x ) * (1.0 / 65536.0) ); + float E2 = float( ( reversebits(Index) >> 16 ) ^ Random.y ) * (1.0 / 65536.0); + return float2( E1, E2 ); + } + + // It's got a license :( + // https://github.com/NVIDIA-RTX/RTXDI/blob/main/Samples/FullSample/Shaders/HelperFunctions.hlsli + float3 SampleGGX_VNDF(float3 Ve, float alpha, inout uint seed) + { + float3 Vh = normalize(float3(alpha * Ve.x, alpha * Ve.y, Ve.z)); + + float lensq = Square(Vh.x) + Square(Vh.y); + float3 T1 = lensq > 0.0 ? float3(-Vh.y, Vh.x, 0.0) / sqrt(lensq) : float3(1.0, 0.0, 0.0); + float3 T2 = cross(Vh, T1); + + float r1 = Random(seed); + float r2 = Random(seed); + + float r = sqrt(r1); + float phi = 2.0 * Math::PI * r2; + float t1 = r * cos(phi); + float t2 = r * sin(phi); + float s = 0.5 * (1.0 + Vh.z); + t2 = (1.0 - s) * sqrt(1.0 - Square(t1)) + s * t2; + + float3 Nh = t1 * T1 + t2 * T2 + sqrt(max(0.0, 1.0 - Square(t1) - Square(t2))) * Vh; + + // Tangent space H + return normalize(float3(alpha * Nh.x, alpha * Nh.y, max(0.0, Nh.z))); + } + + // Also got a license, but a permissive one + // https://github.com/NVIDIA-RTX/Donut/blob/main/include/donut/shaders/brdf.hlsli + float ImportanceSampleGGX_VNDF_PDF(float alpha, float3 N, float3 V, float3 L) + { + float3 H = normalize(L + V); + float NoH = saturate(dot(N, H)); + float VoH = saturate(dot(V, H)); + + float D = Square(alpha) / (Math::PI * Square(Square(NoH) * Square(alpha) + (1 - Square(NoH)))); + return (VoH > 0.0) ? D / (4.0 * VoH) : 0.0; + } + + // Keep this for alpha versions of GGX functions + float GGX_D(float alphaSquared, float NdotH) { + float b = ((alphaSquared - 1.0f) * saturate(NdotH * NdotH) + 1.0f); + b = max(b, 0.001f); + return alphaSquared / (Math::PI * b * b); + } + + float Smith_G1_GGX(float alpha, float NdotS, float alphaSquared, float NdotSSquared) { + return 2.0f / (sqrt(((alphaSquared * (1.0f - NdotSSquared)) + NdotSSquared) / NdotSSquared) + 1.0f); + } + + // PDF of sampling a reflection vector L using 'sampleGGXVNDF'. + // Note that PDF of sampling given microfacet normal is (G1 * D) when vectors are in local space (in the hemisphere around shading normal). + // Remaining terms (1.0f / (4.0f * NdotV)) are specific for reflection case, and come from multiplying PDF by jacobian of reflection operator + float SampleGGXVNDFReflectionPdf(float alpha, float alphaSquared, float NdotH, float NdotV, float LdotH) { + NdotH = max(0.00001f, NdotH); + NdotV = max(0.00001f, NdotV); + return (GGX_D(max(0.00001f, alphaSquared), NdotH) * Smith_G1_GGX(alpha, NdotV, alphaSquared, NdotV * NdotV)) / (4.0f * NdotV); + } + + float SpecularSampleWeightGGXVNDF(float alpha, float alphaSquared, float NdotL, float NdotV, float HdotL, float NdotH) { + return Smith_G1_GGX(alpha, NdotL, alphaSquared, NdotL * NdotL); + } + + float VisibleGGXPDF_aniso(float3 V, float3 H, float2 Alpha, bool bLimitVDNFToReflection = true) + { + float NoV = V.z; + float NoH = H.z; + float VoH = dot(V, H); + float a2 = Alpha.x * Alpha.y; + float3 Hs = float3(Alpha.y * H.x, Alpha.x * H.y, a2 * NoH); + float S = dot(Hs, Hs); + float D = (1.0f / Math::PI) * a2 * pow(a2 / S, 2); + float LenV = length(float3(V.x * Alpha.x, V.y * Alpha.y, NoV)); + float k = 1.0; + if (bLimitVDNFToReflection) + { + float a = saturate(min(Alpha.x, Alpha.y)); + float s = 1.0f + length(V.xy); + float ka2 = a * a, s2 = s * s; + k = (s2 - ka2 * s2) / (s2 + ka2 * V.z * V.z); // Eq. 5 + } + float Pdf = (2 * D * VoH) / (k * NoV + LenV); + return Pdf; + } + + // PDF = G_SmithV * VoH * D / NoV / (4 * VoH) + // PDF = G_SmithV * D / (4 * NoV) + float4 ImportanceSampleVisibleGGX(float2 E, float2 Alpha, float3 V, bool bLimitVDNFToReflection = true) + { + // stretch + float3 Vh = normalize(float3(Alpha * V.xy, V.z)); + + // "Sampling Visible GGX Normals with Spherical Caps" + // Jonathan Dupuy & Anis Benyoub - High Performance Graphics 2023 + float Phi = (2 * Math::PI) * E.x; + float k = 1.0; + if (bLimitVDNFToReflection) + { + // If we know we will be reflecting the view vector around the sampled micronormal, we can + // tweak the range a bit more to eliminate some of the vectors that will point below the horizon + float a = saturate(min(Alpha.x, Alpha.y)); + float s = 1.0 + length(V.xy); + float a2 = a * a, s2 = s * s; + k = (s2 - a2 * s2) / (s2 + a2 * V.z * V.z); + } + float Z = lerp(1.0, -k * Vh.z, E.y); + float SinTheta = sqrt(saturate(1 - Z * Z)); + float X = SinTheta * cos(Phi); + float Y = SinTheta * sin(Phi); + float3 H = float3(X, Y, Z) + Vh; + + // unstretch + H = normalize(float3(Alpha * H.xy, max(0.0, H.z))); + + return float4(H, VisibleGGXPDF_aniso(V, H, Alpha)); + } + + float Schlick_Fresnel(float F0, float VdotH) + { + return F0 + (1 - F0) * pow(max(1 - VdotH, 0), 5); + } + + float3 Schlick_Fresnel(float3 F0, float VdotH) + { + return F0 + (1 - F0) * pow(max(1 - VdotH, 0), 5); + } + + float G1_Smith(float alpha, float NdotL) + { + return 2.0 * NdotL / (NdotL + sqrt(Square(alpha) + (1.0 - Square(alpha)) * Square(NdotL))); + } + + // Compute GGX lobe Weight and Pdf (without Fresnel term) given a set of vectors in local space (Z up) + float2 GGXEvalReflection(float3 L, float3 V, float3 H, float2 Alpha, bool bLimitVDNFToReflection = true) + { + const float NoL = saturate(L.z); + const float NoV = saturate(V.z); + + if (NoL > 0 && NoV > 0) + { + const float D = BRDF::D_AnisoGGX(Alpha.x, Alpha.y, H.z, H.x, H.y); + // See implementation in Vis_SmithJointAniso for G2/(4*NoV*NoL) + // We can simplify a bit further since we need both the weight G2/G1 and the pdf + const float LenL = length(float3(L.xy * Alpha, NoL)); + const float LenV = length(float3(V.xy * Alpha, NoV)); + float k = 1.0; + if (bLimitVDNFToReflection) + { + float a = saturate(min(Alpha.x, Alpha.y)); + float s = 1.0f + length(V.xy); + float a2 = a * a, s2 = s * s; + k = (s2 - a2 * s2) / (s2 + a2 * NoV * NoV); // Eq. 5 + } + const float Weight = NoL * (LenV + k * NoV) / (NoV * LenL + NoL * LenV); + const float Pdf = 0.5 * D * rcp(LenV + k * NoV); + + return float2(Weight, Pdf); + } + return 0; + } + + // https://github.com/NVIDIA-RTX/Streamline/blob/main/docs/ProgrammingGuideDLSS_RR.md#421-specular-albedo-generation + float3 EnvBRDFApprox2(float3 SpecularColor, float Alpha, float NoV) + { + NoV = abs(NoV); + // [Ray Tracing Gems, Chapter 32] + float4 X; + X.x = 1.f; + X.y = NoV; + X.z = NoV * NoV; + X.w = NoV * X.z; + float4 Y; + Y.x = 1.f; + Y.y = Alpha; + Y.z = Alpha * Alpha; + Y.w = Alpha * Y.z; + float2x2 M1 = float2x2(0.99044f, -1.28514f, 1.29678f, -0.755907f); + float3x3 M2 = float3x3(1.f, 2.92338f, 59.4188f, 20.3225f, -27.0302f, 222.592f, 121.563f, 626.13f, 316.627f); + float2x2 M3 = float2x2(0.0365463f, 3.32707, 9.0632f, -9.04756); + float3x3 M4 = float3x3(1.f, 3.59685f, -1.36772f, 9.04401f, -16.3174f, 9.22949f, 5.56589f, 19.7886f, -20.2123f); + float bias = dot(mul(M1, X.xy), Y.xy) * rcp(dot(mul(M2, X.xyw), Y.xyw)); + float scale = dot(mul(M3, X.xy), Y.xy) * rcp(dot(mul(M4, X.xzw), Y.xyw)); + // This is a hack for specular reflectance of 0 + bias *= saturate(SpecularColor.g * 50); + return mad(SpecularColor, max(0, scale), max(0, bias)); + } + + float D_GGXAlpha(float NoH, float alpha) + { + float a = NoH * alpha; + float k = alpha / (1.0 - NoH * NoH + a * a); + return k * k * (1.0 / Math::PI); + } + + float V_SmithGGXCorrelatedFast(float NoV, float NoL, float a) + { + float GGXV = NoL * (NoV * (1.0 - a) + a); + float GGXL = NoV * (NoL * (1.0 - a) + a); + return 0.5 / (GGXV + GGXL); + } + + // Calculates probability of selecting BRDF (specular or diffuse) using the approximate Fresnel term + float GetSpecularBrdfProbability(Surface surface, float3 viewVector, float3 shadingNormal) + { + // Evaluate Fresnel term using the shading normal + // Note: we use the shading normal instead of the microfacet normal (half-vector) for Fresnel term here. That's suboptimal for rough surfaces at grazing angles, but half-vector is yet unknown at this point + float specularF0 = CalcLuminance(surface.F0); + float diffuseReflectance = CalcLuminance(surface.DiffuseAlbedo); + + float fresnel = saturate(CalcLuminance(BRDF::F_Schlick(specularF0, BRDF::ShadowedF90(specularF0), max(0.0f, dot(viewVector, shadingNormal))))); + + // Approximate relative contribution of BRDFs using the Fresnel term + float specular = fresnel; + float diffuse = diffuseReflectance * (1.0f - fresnel); //< If diffuse term is weighted by Fresnel, apply it here as well + + // Return probability of selecting specular BRDF over diffuse BRDF + float probability = (specular / max(0.0001f, (specular + diffuse))); + + // Clamp probability to avoid undersampling of less prominent BRDF + return clamp(probability, 0.1f, 0.9f); + } + + // Helper functions for multiple importance sampling + + // Multiple importance sampling balance heuristic + // [Veach 1997, "Robust Monte Carlo Methods for Light Transport Simulation"] + float MISWeightBalanced(float Pdf, float OtherPdf) + { + // The straightforward implementation is prone to numerical overflow, divisions by 0 + // and does not work well with +inf inputs. + // return Pdf / (Pdf + OtherPdf); + + // We want this function to have the following properties: + // 0 <= w(a,b) <= 1 for all possible positive floats a and b (including 0 and +inf) + // w(a, b) + w(b, a) == 1.0 + + // The formulation below is much more stable across the range of all possible inputs + // and guarantees the sum always adds up to 1.0. + + // Evaluate the expression using the ratio of the smaller value to the bigger one for greater + // numerical stability. The math would also work using the ratio of bigger to smaller value, + // which would underflow less but would make the weights asymmetric. Underflow to 0 is not a + // bad property to have in rendering application as it ensures more weights are exactly 0 + // which allows some evaluations to be skipped. + float X = min(Pdf, OtherPdf) / max(Pdf, OtherPdf); // This ratio is guaranteed to be in [0,1] + float Y = Pdf == OtherPdf ? 1.0 : X; // Guard against NaNs from 0/0 and Inf/Inf + float M = rcp(1.0 + Y); + return Pdf > OtherPdf ? M : 1.0 - M; // This ensures exchanging arguments will produce values that add back up to 1.0 exactly + } + + // Multiple importance sampling power heuristic of two functions with a power of two. + // [Veach 1997, "Robust Monte Carlo Methods for Light Transport Simulation"] + float MISWeightPower(float Pdf, float OtherPdf) + { + // Naive code (which can overflow, divide by 0, etc ..) + // return Pdf * Pdf / (Pdf * Pdf + OtherPdf * OtherPdf); + + // See function above for the explanation of how this works + float X = min(Pdf, OtherPdf) / max(Pdf, OtherPdf); // This ratio is guaranteed to be in [0,1] + float Y = Pdf == OtherPdf ? 1.0 : X; // Guard against NaNs from 0/0 and Inf/Inf + float M = rcp(1.0 + Y * Y); + return Pdf > OtherPdf ? M : 1.0 - M; // This ensures exchanging arguments will produce values that add back up to 1.0 exactly + } + + // Takes as input the sample weight and pdf for a certain lobe of a mixed model, together with the probability of picking that lobe + // This function then updates a running total Weight and Pdf value that represents the overall contribution of the BxDF + // This function should be called when a BxDF is made up of multiple lobes combined with a sum to correctly account for the probability + // of sampling directions via all lobes. + // NOTE: this function also contains special logic to handle cases with infinite pdfs cleanly + void AddLobeWithMIS(inout float3 Weight, inout float Pdf, float3 LobeWeight, float LobePdf, float LobeProb) + { + const float MinLobeProb = 1.1754943508e-38; // smallest normal float + if (LobeProb > MinLobeProb) + { + LobePdf *= LobeProb; + LobeWeight *= rcp(LobeProb); + Weight = lerp(Weight, LobeWeight, MISWeightBalanced(LobePdf, Pdf)); + Pdf += LobePdf; + } + } + + float3 DiffuseAO(float3 diffuseColor, float ao) + { + return Color::MultiBounceAO(diffuseColor, ao); + } + + float3 SpecularAO(float NdotV, float roughness, float ao, float3 f0) + { + float specularAO = Color::SpecularAOLagarde(NdotV, ao, roughness); + return Color::MultiBounceAO(f0, specularAO); + } + + // Horizon specular occlusion + // https://marmosetco.tumblr.com/post/81245981087 + float Horizon(float3 V, float3 N, float3 VN) + { + float3 R = reflect(-V, N); + float horizon = min(1.0 + dot(R, VN), 1.0); + + return horizon * horizon; + } + + // https://github.com/NVIDIA-RTX/RTXDI/blob/main/Samples/FullSample/Shaders/LightingPasses/BrdfRayTracing.hlsl + bool GGXBRDF(in Surface surface, in BRDFContext brdfContext, inout uint randomSeed, out float3 direction, out float3 BRDF_over_PDF) + { + bool isSpecularRay = false; + const bool isDeltaSurface = surface.Roughness == 0; + float specular_PDF; + float overall_PDF; + + { + float3 specularDirection; + float3 specular_BRDF_over_PDF; + { + float3 Ve = float3( + dot(brdfContext.ViewDirection, surface.Tangent), + dot(brdfContext.ViewDirection, surface.Bitangent), + dot(brdfContext.ViewDirection, surface.Normal) + ); + + const float alpha = surface.Roughness * surface.Roughness; + + float3 He = SampleGGX_VNDF(Ve, alpha, randomSeed); + float3 H = isDeltaSurface ? surface.Normal : surface.Mul(He); + specularDirection = reflect(-brdfContext.ViewDirection, H); + + float HoV = saturate(dot(H, brdfContext.ViewDirection)); + float3 F = Schlick_Fresnel(surface.F0, HoV); + float G1 = isDeltaSurface ? 1.0 : (brdfContext.NdotV > 0) ? G1_Smith(alpha, brdfContext.NdotV) : 0; + specular_BRDF_over_PDF = F * G1; + } + + float3 diffuseDirection; + float diffuse_BRDF_over_PDF; + { + float3 localDirection = SampleCosineHemisphere(randomSeed); + diffuseDirection = surface.Mul(localDirection); + diffuse_BRDF_over_PDF = 1.0; + } + + specular_PDF = saturate(CalcLuminance(specular_BRDF_over_PDF) / + CalcLuminance(specular_BRDF_over_PDF + diffuse_BRDF_over_PDF * surface.DiffuseAlbedo)); + + isSpecularRay = Random(randomSeed) < specular_PDF; + + if (isSpecularRay) + { + direction = specularDirection; + BRDF_over_PDF = specular_BRDF_over_PDF / specular_PDF; + } + else + { + direction = diffuseDirection; + BRDF_over_PDF = diffuse_BRDF_over_PDF / (1.0 - specular_PDF); + } + + /*const float specularLobe_PDF = ImportanceSampleGGX_VNDF_PDF(roughness, N, V, direction); + const float diffuseLobe_PDF = saturate(dot(direction, N)) / Math::PI; + + // For delta surfaces, we only pass the diffuse lobe to ReSTIR GI, and this pdf is for that. + overall_PDF = isDeltaSurface ? diffuseLobe_PDF : lerp(diffuseLobe_PDF, specularLobe_PDF, specular_PDF);*/ + } + + return isSpecularRay; + } + + // When sampling from discrete CDFs, it can be convenient to re-use the random number by rescaling it + // This function assumes that RandVal is in the interval: [LowerBound, UpperBound) and returns a value in [0,1) + float RescaleRandomNumber(float RandVal, float LowerBound, float UpperBound) + { + const float OneMinusEpsilon = 0.99999994; // 32-bit float just before 1.0 + return min((RandVal - LowerBound) / (UpperBound - LowerBound), OneMinusEpsilon); + } +} + +#endif // MONTE_CARLO_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/PBR.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/PBR.hlsli new file mode 100644 index 0000000000..162c9eb6d5 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/PBR.hlsli @@ -0,0 +1,64 @@ +#ifndef PBR_HLSL +#define PBR_HLSL + +#include "Common/BRDF.hlsli" +#include "Common/Color.hlsli" +#include "Common/Math.hlsli" + +#include "Raytracing/Includes/Common.hlsli" + +namespace PBR +{ + namespace Flags + { + static const uint16_t HasEmissive = (1 << 0); + static const uint16_t HasDisplacement = (1 << 1); + static const uint16_t HasFeatureTexture0 = (1 << 2); + static const uint16_t HasFeatureTexture1 = (1 << 3); + static const uint16_t Subsurface = (1 << 4); + static const uint16_t TwoLayer = (1 << 5); + static const uint16_t ColoredCoat = (1 << 6); + static const uint16_t InterlayerParallax = (1 << 7); + static const uint16_t CoatNormal = (1 << 8); + static const uint16_t Fuzz = (1 << 9); + static const uint16_t HairMarschner = (1 << 10); + static const uint16_t Glint = (1 << 11); + static const uint16_t ProjectedGlint = (1 << 12); + } + + namespace Defaults + { + static const float Roughness = 1.0f; + static const float Metallic = 0.0f; + static const float3 F0 = float3(0.04f, 0.04f, 0.04f); + } + + namespace Constants + { + static const float MinRoughness = 0.04f; + static const float MaxRoughness = 1.0f; + static const float MinGlintDensity = 1.0f; + static const float MaxGlintDensity = 40.0f; + static const float MinGlintRoughness = 0.005f; + static const float MaxGlintRoughness = 0.3f; + static const float MinGlintDensityRandomization = 0.0f; + static const float MaxGlintDensityRandomization = 5.0f; + } + + float Roughness(float linearRoughness, float lower, float upper) + { + return clamp(Remap(linearRoughness, lower, upper), Constants::MinRoughness, Constants::MaxRoughness); + } + + float3 F0(float3 albedo, float metalness) + { + return saturate(lerp(Defaults::F0, albedo, metalness)); + } + + float3 F0(float3 specularLevel, float3 albedo, float metalness) + { + return saturate(lerp(specularLevel, albedo, metalness)); + } +} + +#endif // PBR_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT.hlsli new file mode 100644 index 0000000000..79cae7fce6 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT.hlsli @@ -0,0 +1,7 @@ +#ifndef RT_TYPES_HLSL +#define RT_TYPES_HLSL + +#include "Raytracing/Includes/RT/Payload.hlsli" +#include "Raytracing/Includes/RT/ShadowPayload.hlsli" + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/CommonRT.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/CommonRT.hlsli new file mode 100644 index 0000000000..b6b0a6e85c --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/CommonRT.hlsli @@ -0,0 +1,131 @@ +#ifndef COMMONRT_HLSL +#define COMMONRT_HLSL + +#include "Common/Game.hlsli" +#include "Common/Math.hlsli" +#include "Common/Color.hlsli" +#include "Common/BRDF.hlsli" + +#include "Raytracing/Includes/Types.hlsli" + +#ifndef MAX_BOUNCES +#define MAX_BOUNCES (1) +#endif + +#ifndef MAX_SAMPLES +#define MAX_SAMPLES (1) +#endif + +#define SHADOW_MAX_DEPTH (1) + +#define DIFFUSE_RAY_HITGROUP_IDX 0 +#define DIFFUSE_RAY_MISS_IDX 0 + +#define SHADOW_RAY_HITGROUP_IDX 1 +#define SHADOW_RAY_MISS_IDX 1 + +#define RAY_TMAX (1e10f) +#define SHADOW_RAY_TMAX (1e5f) + +#define GN_BIAS_MAX (0.5f) + +#define MIN_DIFFUSE_SHADOW (0.0001f) +#define MIN_RADIANCE (0.01f) +#define RR_MIN_BOUNCE (3) + +#define DIV_EPSILON (1e-4f) + +#define LAND_MIN_WEIGHT (0.01f) + +uint InitRandomSeed(uint2 coord, uint2 size, uint frameCount) +{ + return coord.x + coord.y * size.x + frameCount * 719393; +} + +uint PCGHash(uint seed) +{ + uint state = seed * 747796405u + 2891336453u; + uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +float Random(inout uint seed) +{ + seed = PCGHash(seed); + return float(seed) / 4294967296.0; // Divide by 2^32 +} + +void CreateOrthonormalBasis(in float3 normal, out float3 tangent, out float3 bitangent) +{ + float3 up = abs(normal.z) < 0.999 ? float3(0, 0, 1) : float3(0, 1, 0); + + tangent = normalize(cross(up, normal)); + bitangent = cross(normal, tangent); +} + +float3 SampleCosineHemisphere(inout uint seed) +{ + float u1 = Random(seed); + float u2 = Random(seed); + + float r = sqrt(u1); + float theta = 2.0 * Math::PI * u2; + + float x = r * cos(theta); + float y = r * sin(theta); + float z = sqrt(1.0 - u1); + + return float3(x, y, z); +} + +float3 SampleCosineHemisphereScaled(inout uint randomSeed, in float scale) +{ + // Generate two uniform random numbers + float r1 = Random(randomSeed); + float r2 = Random(randomSeed); + + // Azimuthal angle + float phi = 2.0f * Math::PI * r1; + + // Maximum cone angle + float cosMax = cos(saturate(scale) * Math::PI / 2.0f); + + // Cosine of polar angle within cone + float cosTheta = lerp(cosMax, 1.0f, sqrt(1.0f - r2)); // cosine-weighted + float sinTheta = sqrt(max(0.0f, 1.0f - cosTheta * cosTheta)); + + // Convert to Cartesian coordinates + return float3( + cos(phi) * sinTheta, + sin(phi) * sinTheta, + cosTheta + ); +} + +float3 TangentToWorld(float3 normal, float3 tangentSample) +{ + float3 tangent; + float3 bitangent; + CreateOrthonormalBasis(normal, tangent, bitangent); + + return tangent * tangentSample.x + + bitangent * tangentSample.y + + normal * tangentSample.z; +} + +float3 SampleConeUniform(inout uint randomSeed, in float cosMax) +{ + float r1 = Random(randomSeed); + float r2 = Random(randomSeed); + float phi = 2.0f * Math::PI * r1; + + float cosTheta = 1.0f - r2 * (1.0f - cosMax); + float sinTheta = sqrt(max(0.0f, 1.0f - cosTheta * cosTheta)); + return float3( + cos(phi) * sinTheta, + sin(phi) * sinTheta, + cosTheta + ); +} + +#endif // COMMONRT_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/Geometry.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/Geometry.hlsli new file mode 100644 index 0000000000..e9aff60991 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/Geometry.hlsli @@ -0,0 +1,73 @@ +#ifndef GEOMETRY_HLSL +#define GEOMETRY_HLSL + +#include "Raytracing/Includes/Registers.hlsli" +#include "Raytracing/Includes/Types.hlsli" + +float3 GetBary(float2 barycentrics) +{ + return float3( + 1.0f - barycentrics.x - barycentrics.y, + barycentrics.x, + barycentrics.y + ); +} + +inline float Interpolate(half u, half v, half w, float3 uvw) +{ + return u * uvw.x + v * uvw.y + w * uvw.z; +} + +inline float2 Interpolate(half2 u, half2 v, half2 w, float3 uvw) +{ + return u * uvw.x + v * uvw.y + w * uvw.z; +} + +inline float3 Interpolate(float3 u, float3 v, float3 w, float3 uvw) +{ + return u * uvw.x + v * uvw.y + w * uvw.z; +} + +inline float3 Interpolate(half3 u, half3 v, half3 w, float3 uvw) +{ + return u * uvw.x + v * uvw.y + w * uvw.z; +} + +inline float4 Interpolate(half4 u, half4 v, half4 w, float3 uvw) +{ + return u * uvw.x + v * uvw.y + w * uvw.z; +} + +Instance GetInstance(uint instanceIdx) +{ + return Instances[instanceIdx]; +} + +Shape GetShape(in uint instanceIndex, in uint geometryIndex) +{ + Instance instance = GetInstance(instanceIndex); + return Shapes[instance.FirstGeometryID + geometryIndex]; +} + +Shape GetShape(in Payload payload, out Instance instance) +{ + instance = GetInstance(payload.InstanceIndex()); + return Shapes[instance.FirstGeometryID + payload.GeometryIndex()]; +} + +Triangle GetTriangle(in uint shapeIdx, in uint primitiveIdx) +{ + return Triangles[shapeIdx][primitiveIdx]; +} + +void GetVertices(in uint shapeIndex, in uint primitiveIndex, out Vertex v0, out Vertex v1, out Vertex v2) +{ + Triangle geomTriangle = GetTriangle(shapeIndex, primitiveIndex); + + StructuredBuffer vertices = Vertices[shapeIndex]; + v0 = vertices[geomTriangle.x]; + v1 = vertices[geomTriangle.y]; + v2 = vertices[geomTriangle.z]; +} + +#endif // GEOMETRY_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/Payload.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/Payload.hlsli new file mode 100644 index 0000000000..558b49473a --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/Payload.hlsli @@ -0,0 +1,54 @@ +#ifndef PAYLOAD_HLSL +#define PAYLOAD_HLSL + +#include "Raytracing/Includes/Materials/TexLODHelpers.hlsli" + +uint PackUnorm2x16(float2 v) +{ + uint2 u = (uint2)round(saturate(v) * 65535.0f); + return u.x | (u.y << 16); +} + +float2 UnpackUnorm2x16(uint p) +{ + uint2 u = uint2(p & 0xFFFF, p >> 16); + return float2(u) * (1.0f / 65535.0f); +} + +struct Payload +{ + float hitDistance; + uint primitiveIndex; + uint barycentricsPacked; + uint instanceGeometryIndexPacked; + uint randomSeed; + + void PackBarycentrics(float2 barycentrics) + { + barycentricsPacked = PackUnorm2x16(barycentrics); + } + + float2 Barycentrics() + { + return UnpackUnorm2x16(barycentricsPacked); + } + + void PackInstanceGeometryIndex(uint instanceIndex, uint geometryIndex) + { + instanceGeometryIndexPacked = (instanceIndex & 0xFFFF) | ((geometryIndex & 0xFFFF) << 16); + } + + uint InstanceIndex() + { + return instanceGeometryIndexPacked & 0xFFFF; + } + + uint GeometryIndex() + { + return instanceGeometryIndexPacked >> 16; + } + + bool Hit() { return hitDistance > 0.0f; } +}; + +#endif // PAYLOAD_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/Rays.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/Rays.hlsli new file mode 100644 index 0000000000..cf5edb1033 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/Rays.hlsli @@ -0,0 +1,194 @@ +#ifndef RAYS_HLSL +#define RAYS_HLSL + +#include "Raytracing/Includes/Types.hlsli" +#include "Raytracing/Includes/Surface.hlsli" +#include "Raytracing/Includes/RT/CommonRT.hlsli" + +// https://github.com/NVIDIAGameWorks/dxvk-remix/blob/main/src/dxvk/shaders/rtx/concept/ray/ray_utilities.h + +/* +* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +* DEALINGS IN THE SOFTWARE. +*/ + +// This ray offsetting method is inspired by the "A Fast and Robust Method for Avoiding Self-Intersection" +// article from the Ray Tracing Gems book. The original implementation from that article was found to be +// flawed in multiple ways, but the logic behind it is solid. +// +// When we hit a triangle and compute a position on that triangle from the vertices and the barycentrics, +// the resulting position is inexact, it has some error. You can think of that as a "cloud" of points +// around the triangle plane, and the position can be anywhere inside that cloud, on either side of +// the triangle. In order to avoid self-intersection, we need to apply an offset along the triangle's +// geometric normal that is larger in magnitude than the thickness of this error cloud. +// The magnitude of this error depends primarily on the magnitude of the vertex positions, IOW, +// the further away our triangle is from the world origin, the larger the error will be. +// +// So, we take the maximum magnitude of the position and multiply it by some constant. +// This is different from the code in the RTG article which was dealing with position components +// independently, but that just distorts the normal direction for most triangles, and fails +// on triangles that are coplanar to one of the major planes like Y=0 in particular. +// +// The reason why dealing with per-component errors fails on triangles coplanar to a major plane +// is apparently in the ray intersection math that is happening inside the GPU. At least the approach +// documented in the Vulkan ray tracing spec (*) is based on transforming the primitive positions to +// ray space, which involves multiplying the positions by a matrix. That matrix propagates +// the error from reconstructing the X and Z positions (in case of Y=0 plane) into the ray T. +// * https://www.khronos.org/registry/vulkan/specs/1.1-khr-extensions/html/chap33.html#ray-intersection-candidate-determination + + +// Calculates the *scaled* approximate error of a float32 position. +// The error is scaled by the 1/kFloatULP constant defined in the calculateRayOffset(...) function, +// which is moved there for efficiency: we don't want to do extra multiplications when combining +// errors from different positions. +// This function should be used on every position value in the chain of transforms, like so: +// +// float error = calculatePositionError(objectSpacePosition); +// float3 worldSpacePosition = mul(objectToWorld, objectSpacePosition); +// error = max(error, calculatePositionError(worldSpacePosition)); <-- update the error +// ... +// float3 offset = calculateRayOffset(error, triangleNormal); +// float3 rayOrigin = worldSpacePosition + offset; +// +float CalculatePositionError(float3 p) +{ + const float maxAbsComponent = max(abs(p.x), max(abs(p.y), abs(p.z))); + return maxAbsComponent; +} + +// This function calculates a ray offset in the direction of the normal, given the error +// previously computed with the calculatePositionError(p) function above. +// If the offset point is desired on the "inside" of a surface (for example when dealing +// with translucency or double sided geometry), invert the normal passed in beforehand. +float3 CalculateRayOffset(float positionError, float3 triangleNormal) +{ + // A single ULP (Unit in the Last Place, or Unit of Least Precision) of 32-bit floats, calculated as + // ((asfloat(asuint(x) + 1) - x) / x) + // The actual value is smaller for numbers that are not powers of 2, so we use the largest ULP. + // It can also be substantially larger for denormals, but we don't really care about them. + const float kFloatULP = 0.00000011920928955078125; // pow(2.0, -23.0); + + // The original RTG article found that there is some "baseline" error coming from non-position sources, + // and we account for that by adding a max(kOrigin, ...) term where kOrigin is the point on the + // error plot in the article where the error switches from plateau to linear dependency on position. + const float kOrigin = 1.0f / 1024.0f; + + // The kOffsetScale value was determined experimentally as the smallest value that doesn't result in + // self-intersections in practice. The article claimed that the relative error is in the order of 10^-7, + // but that is hard to believe because that's just 1 ULP of float32. At the same time, the article + // was effectively multiplying the normal by 1 / 32768.0 with some cryptic integer math, and that + // is often too large. + const float kOffsetScale = 4.0; + + return (max(kOrigin, positionError) * (kFloatULP * kOffsetScale)) * triangleNormal; +} + +// The method above seems to cause some self-intersection issues in certain scenarios + +// Computes new ray origin based on hit position to avoid self-intersections. +// The function assumes that the hit position has been computed by barycentric interpolation, and not from the ray t which is less accurate. +// Described in Ray Tracing Gems, Chapter 6, "A Fast and Robust Method for Avoiding Self-Intersection" by Carsten Wächter and Nikolaus Binder. +// float3 OffsetRay(float3 worldPosition, float3 faceNormal, bool hasTransmission = false) // expects triangle faceNormal pointing towards the intended ray direction +// { +// if (hasTransmission) +// faceNormal = -faceNormal; +// const float origin = 1.f / 16.f; +// const float fScale = 3.f / 65536.f; +// const float iScale = 3 * 256.f; + +// // Per-component integer offset to bit representation of fp32 position. +// int3 iOff = int3(faceNormal * iScale); +// float3 iPos = asfloat(asint(worldPosition) + select(worldPosition < 0.f, -iOff, iOff)); + +// // Select per-component between small fixed offset or above variable offset depending on distance to origin. +// float3 fOff = faceNormal * fScale; +// return select(abs(worldPosition) < origin, worldPosition + fOff, iPos); +// } + +float3 OffsetRay(float3 position, float3 normal, bool hasTransmission = false) +{ + float3 offset = CalculateRayOffset(CalculatePositionError(position), normal); + if (hasTransmission) + offset = -offset; + return position + offset; +} + +float3 TraceRayShadow(RaytracingAccelerationStructure scene, Surface surface, float3 direction, inout uint randomSeed) +{ + RayDesc ray; + bool hasTransmission = any(surface.TransmissionColor > 0.0f) && dot(surface.FaceNormal, direction) < 0.0f; + ray.Origin = OffsetRay(surface.Position, surface.FaceNormal, hasTransmission); + ray.Direction = direction; + ray.TMin = 0.0f; + ray.TMax = SHADOW_RAY_TMAX; + + ShadowPayload shadowPayload; + shadowPayload.missed = 0.0f; + shadowPayload.randomSeed = randomSeed; + shadowPayload.transmission = float3(1.0f, 1.0f, 1.0f); + + TraceRay(scene, RAY_FLAG_SKIP_CLOSEST_HIT_SHADER | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, 0xFF, SHADOW_RAY_HITGROUP_IDX, 0, SHADOW_RAY_MISS_IDX, ray, shadowPayload); + + randomSeed = shadowPayload.randomSeed; + return shadowPayload.transmission * shadowPayload.missed; +} + +float3 TraceRayShadowFinite(RaytracingAccelerationStructure scene, Surface surface, float3 direction, float tmax, inout uint randomSeed) +{ + RayDesc ray; + bool hasTransmission = any(surface.TransmissionColor > 0.0f) && dot(surface.FaceNormal, direction) < 0.0f; + ray.Origin = OffsetRay(surface.Position, surface.FaceNormal, hasTransmission); + ray.Direction = direction; + ray.TMin = 0.0f; + ray.TMax = tmax; + + ShadowPayload shadowPayload; + shadowPayload.missed = 0.0f; + shadowPayload.randomSeed = randomSeed; + shadowPayload.transmission = float3(1.0f, 1.0f, 1.0f); + + TraceRay(scene, RAY_FLAG_SKIP_CLOSEST_HIT_SHADER | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, 0xFF, SHADOW_RAY_HITGROUP_IDX, 0, SHADOW_RAY_MISS_IDX, ray, shadowPayload); + + randomSeed = shadowPayload.randomSeed; + return shadowPayload.transmission * shadowPayload.missed; +} + +Payload SampleSubsurface(RaytracingAccelerationStructure scene, const float3 samplePosition, const float3 surfaceNormal, const float tmax, inout uint randomSeed) +{ + RayDesc ray; + ray.Origin = samplePosition; + ray.Direction = -surfaceNormal; // Shooting ray towards the surface + ray.TMin = 0.0f; + ray.TMax = tmax; + + Payload payload; + payload.hitDistance = -1.0f; + payload.primitiveIndex = 0; + payload.PackBarycentrics(float2(0.0f, 0.0f)); + payload.PackInstanceGeometryIndex(0, 0); + payload.randomSeed = randomSeed; + + TraceRay(scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, 0xFF, DIFFUSE_RAY_HITGROUP_IDX, 0, DIFFUSE_RAY_MISS_IDX, ray, payload); + randomSeed = payload.randomSeed; + + return payload; +} + +#endif // RAYS_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/HashGridCommon.h b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/HashGridCommon.h new file mode 100644 index 0000000000..fcd1ef74f9 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/HashGridCommon.h @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +// Constants +#define HASH_GRID_POSITION_BIT_NUM 17 +#define HASH_GRID_POSITION_BIT_MASK ((1u << HASH_GRID_POSITION_BIT_NUM) - 1) +#define HASH_GRID_LEVEL_BIT_NUM 10 +#define HASH_GRID_LEVEL_BIT_MASK ((1u << HASH_GRID_LEVEL_BIT_NUM) - 1) +#define HASH_GRID_NORMAL_BIT_NUM 3 +#define HASH_GRID_NORMAL_BIT_MASK ((1u << HASH_GRID_NORMAL_BIT_NUM) - 1) +#define HASH_GRID_HASH_MAP_BUCKET_SIZE 16 +#define HASH_GRID_INVALID_HASH_KEY 0 +#define HASH_GRID_INVALID_CACHE_INDEX 0xFFFFFFFF + +// Tweakable parameters +#ifndef HASH_GRID_USE_NORMALS +# define HASH_GRID_USE_NORMALS 1 // account for the normal data in the hash key +#endif + +#ifndef HASH_GRID_POSITION_OFFSET +# define HASH_GRID_POSITION_OFFSET float3(0.0f, 0.0f, 0.0f) +#endif + +#ifndef HASH_GRID_POSITION_BIAS +# define HASH_GRID_POSITION_BIAS 1e-4f // may require adjustment for extreme scene scales +#endif + +#ifndef HASH_GRID_NORMAL_BIAS +# define HASH_GRID_NORMAL_BIAS 1e-3f +#endif + +#define HashGridIndex uint +#define HashGridKey uint64_t + +#ifndef HASH_GRID_LOOP_ATTR +# define HASH_GRID_LOOP_ATTR [loop] +#endif + +struct HashGridParameters +{ + float3 cameraPosition; + float logarithmBase; + float sceneScale; + float levelBias; +}; + +float HashGridLogBase(float x, float base) +{ + return log(x) / log(base); +} + +// http://burtleburtle.net/bob/hash/integer.html +uint HashGridHashJenkins32(uint a) +{ + a = (a + 0x7ed55d16) + (a << 12); + a = (a ^ 0xc761c23c) ^ (a >> 19); + a = (a + 0x165667b1) + (a << 5); + a = (a + 0xd3a2646c) ^ (a << 9); + a = (a + 0xfd7046c5) + (a << 3); + a = (a ^ 0xb55a4f09) ^ (a >> 16); + + return a; +} + +uint HashGridHash32(HashGridKey hashKey) +{ + return HashGridHashJenkins32(uint((hashKey >> 0) & 0xFFFFFFFF)) ^ HashGridHashJenkins32(uint((hashKey >> 32) & 0xFFFFFFFF)); +} + +uint HashGridGetBaseSlot(const HashGridKey hashKey, uint capacity) +{ + uint hash = HashGridHash32(hashKey); + uint slot = hash % capacity; + + return min(slot, capacity - HASH_GRID_HASH_MAP_BUCKET_SIZE); +} + +uint HashGridGetLevel(float3 samplePosition, HashGridParameters gridParameters) +{ + const float distance2 = dot(gridParameters.cameraPosition - samplePosition, gridParameters.cameraPosition - samplePosition); + + return uint(clamp(0.5f * HashGridLogBase(distance2, gridParameters.logarithmBase) + gridParameters.levelBias, 1.0f, float(HASH_GRID_LEVEL_BIT_MASK))); +} + +float HashGridGetVoxelSize(uint gridLevel, HashGridParameters gridParameters) +{ + return pow(gridParameters.logarithmBase, gridLevel) / (gridParameters.sceneScale * pow(gridParameters.logarithmBase, gridParameters.levelBias)); +} + +// Based on logarithmic caching by Johannes Jendersie +int4 HashGridCalculatePositionLog(float3 samplePosition, HashGridParameters gridParameters) +{ + samplePosition += float3(HASH_GRID_POSITION_BIAS, HASH_GRID_POSITION_BIAS, HASH_GRID_POSITION_BIAS); + + uint gridLevel = HashGridGetLevel(samplePosition, gridParameters); + float voxelSize = HashGridGetVoxelSize(gridLevel, gridParameters); + int3 gridPosition = int3(floor(samplePosition / voxelSize)); + + return int4(gridPosition.xyz, gridLevel); +} + +HashGridKey HashGridComputeSpatialHash(float3 samplePosition, float3 sampleNormal, HashGridParameters gridParameters) +{ + uint4 gridPosition = uint4(HashGridCalculatePositionLog(samplePosition, gridParameters)); + + HashGridKey hashKey = ((uint64_t(gridPosition.x) & HASH_GRID_POSITION_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 0)) | + ((uint64_t(gridPosition.y) & HASH_GRID_POSITION_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 1)) | + ((uint64_t(gridPosition.z) & HASH_GRID_POSITION_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 2)) | + ((uint64_t(gridPosition.w) & HASH_GRID_LEVEL_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 3)); + +#if HASH_GRID_USE_NORMALS + uint normalBits = + (sampleNormal.x + HASH_GRID_NORMAL_BIAS >= 0 ? 0 : 1) + + (sampleNormal.y + HASH_GRID_NORMAL_BIAS >= 0 ? 0 : 2) + + (sampleNormal.z + HASH_GRID_NORMAL_BIAS >= 0 ? 0 : 4); + + hashKey |= (uint64_t(normalBits) << (HASH_GRID_POSITION_BIT_NUM * 3 + HASH_GRID_LEVEL_BIT_NUM)); +#endif // HASH_GRID_USE_NORMALS + + return hashKey; +} + +float3 HashGridGetPositionFromKey(const HashGridKey hashKey, HashGridParameters gridParameters) +{ + const int signBit = 1 << (HASH_GRID_POSITION_BIT_NUM - 1); + const int signMask = ~((1 << HASH_GRID_POSITION_BIT_NUM) - 1); + + int3 gridPosition; + gridPosition.x = int((hashKey >> (HASH_GRID_POSITION_BIT_NUM * 0)) & HASH_GRID_POSITION_BIT_MASK); + gridPosition.y = int((hashKey >> (HASH_GRID_POSITION_BIT_NUM * 1)) & HASH_GRID_POSITION_BIT_MASK); + gridPosition.z = int((hashKey >> (HASH_GRID_POSITION_BIT_NUM * 2)) & HASH_GRID_POSITION_BIT_MASK); + + // Fix negative coordinates + gridPosition.x = (gridPosition.x & signBit) != 0 ? gridPosition.x | signMask : gridPosition.x; + gridPosition.y = (gridPosition.y & signBit) != 0 ? gridPosition.y | signMask : gridPosition.y; + gridPosition.z = (gridPosition.z & signBit) != 0 ? gridPosition.z | signMask : gridPosition.z; + + uint gridLevel = uint((hashKey >> HASH_GRID_POSITION_BIT_NUM * 3) & HASH_GRID_LEVEL_BIT_MASK); + float voxelSize = HashGridGetVoxelSize(gridLevel, gridParameters); + float3 samplePosition = (gridPosition + 0.5f) * voxelSize; + + return samplePosition; +} + +struct HashMapData +{ + uint capacity; + + RW_STRUCTURED_BUFFER(hashEntriesBuffer, uint64_t); + +#if !HASH_GRID_ENABLE_64_BIT_ATOMICS + RW_STRUCTURED_BUFFER(lockBuffer, uint); +#endif // !HASH_GRID_ENABLE_64_BIT_ATOMICS +}; + +void HashMapAtomicCompareExchange(in HashMapData hashMapData, in uint dstOffset, in uint64_t compareValue, in uint64_t value, out uint64_t originalValue) +{ +#if HASH_GRID_ENABLE_64_BIT_ATOMICS +# if SHARC_ENABLE_GLSL + originalValue = InterlockedCompareExchange(BUFFER_AT_OFFSET(hashMapData.hashEntriesBuffer, dstOffset), compareValue, value); +# else // !SHARC_ENABLE_GLSL + InterlockedCompareExchange(BUFFER_AT_OFFSET(hashMapData.hashEntriesBuffer, dstOffset), compareValue, value, originalValue); +# endif // !SHARC_ENABLE_GLSL +#else // !HASH_GRID_ENABLE_64_BIT_ATOMICS + // ANY rearangments to the code below lead to device hang if fuse is unlimited + const uint cLock = 0xAAAAAAAA; + uint fuse = 0; + const uint fuseLength = 8; + bool busy = true; + while (busy && fuse < fuseLength) { + uint state; + InterlockedExchange(hashMapData.lockBuffer[dstOffset], cLock, state); + busy = state != 0; + + if (state != cLock) { + originalValue = BUFFER_AT_OFFSET(hashMapData.hashEntriesBuffer, dstOffset); + if (originalValue == compareValue) + BUFFER_AT_OFFSET(hashMapData.hashEntriesBuffer, dstOffset) = value; + InterlockedExchange(hashMapData.lockBuffer[dstOffset], state, fuse); + fuse = fuseLength; + } + ++fuse; + } +#endif // !HASH_GRID_ENABLE_64_BIT_ATOMICS +} + +bool HashMapInsert(in HashMapData hashMapData, const HashGridKey hashKey, out HashGridIndex cacheIndex) +{ + const uint baseSlot = HashGridGetBaseSlot(hashKey, hashMapData.capacity); + HASH_GRID_LOOP_ATTR + for (uint bucketOffset = 0; bucketOffset < HASH_GRID_HASH_MAP_BUCKET_SIZE; ++bucketOffset) { + HashGridKey prevHashGridKey; + HashMapAtomicCompareExchange(hashMapData, baseSlot + bucketOffset, HASH_GRID_INVALID_HASH_KEY, hashKey, prevHashGridKey); + + if (prevHashGridKey == HASH_GRID_INVALID_HASH_KEY || prevHashGridKey == hashKey) { + cacheIndex = baseSlot + bucketOffset; + return true; + } + } + + cacheIndex = hashMapData.capacity - 1; + + return false; +} + +bool HashMapFind(in HashMapData hashMapData, const HashGridKey hashKey, inout HashGridIndex cacheIndex, out uint bucketOffset) +{ + const uint baseSlot = HashGridGetBaseSlot(hashKey, hashMapData.capacity); + HASH_GRID_LOOP_ATTR + for (bucketOffset = 0; bucketOffset < HASH_GRID_HASH_MAP_BUCKET_SIZE; ++bucketOffset) { + HashGridKey storedHashKey = BUFFER_AT_OFFSET(hashMapData.hashEntriesBuffer, baseSlot + bucketOffset); + + if (storedHashKey == hashKey) { + cacheIndex = baseSlot + bucketOffset; + return true; + } + } + + return false; +} + +HashGridIndex HashMapInsertEntry(in HashMapData hashMapData, float3 samplePosition, float3 sampleNormal, HashGridParameters gridParameters) +{ + HashGridIndex cacheIndex = HASH_GRID_INVALID_CACHE_INDEX; + const HashGridKey hashKey = HashGridComputeSpatialHash(samplePosition, sampleNormal, gridParameters); + bool successful = HashMapInsert(hashMapData, hashKey, cacheIndex); + + return cacheIndex; +} + +HashGridIndex HashMapFindEntry(in HashMapData hashMapData, float3 samplePosition, float3 sampleNormal, HashGridParameters gridParameters) +{ + HashGridIndex cacheIndex = HASH_GRID_INVALID_CACHE_INDEX; + const HashGridKey hashKey = HashGridComputeSpatialHash(samplePosition, sampleNormal, gridParameters); + uint hashCollisionsNum; + bool successful = HashMapFind(hashMapData, hashKey, cacheIndex, hashCollisionsNum); + + return cacheIndex; +} + +// Debug functions +float3 HashGridGetColorFromHash32(uint hash) +{ + float3 color; + color.x = ((hash >> 0) & 0x3ff) / 1023.0f; + color.y = ((hash >> 11) & 0x7ff) / 2047.0f; + color.z = ((hash >> 22) & 0x7ff) / 2047.0f; + + return color; +} + +// Debug visualization +float3 HashGridDebugColoredHash(float3 samplePosition, float3 sampleNormal, HashGridParameters gridParameters) +{ + HashGridKey hashKey = HashGridComputeSpatialHash(samplePosition, sampleNormal, gridParameters); + uint gridLevel = HashGridGetLevel(samplePosition, gridParameters); + float3 color = HashGridGetColorFromHash32(HashGridHash32(hashKey)) * HashGridGetColorFromHash32(HashGridHashJenkins32(gridLevel)).xyz; + + return color; +} + +float3 HashGridDebugOccupancy(uint2 pixelPosition, uint2 screenSize, HashMapData hashMapData) +{ + const uint elementSize = 7; + const uint borderSize = 1; + const uint blockSize = elementSize + borderSize; + + uint rowNum = screenSize.y / blockSize; + uint rowIndex = pixelPosition.y / blockSize; + uint columnIndex = pixelPosition.x / blockSize; + uint elementIndex = (columnIndex / HASH_GRID_HASH_MAP_BUCKET_SIZE) * (rowNum * HASH_GRID_HASH_MAP_BUCKET_SIZE) + rowIndex * HASH_GRID_HASH_MAP_BUCKET_SIZE + (columnIndex % HASH_GRID_HASH_MAP_BUCKET_SIZE); + + if (elementIndex < hashMapData.capacity && ((pixelPosition.x % blockSize) < elementSize && (pixelPosition.y % blockSize) < elementSize)) { + HashGridKey storedHashGridKey = BUFFER_AT_OFFSET(hashMapData.hashEntriesBuffer, elementIndex); + if (storedHashGridKey != HASH_GRID_INVALID_HASH_KEY) + return float3(0.0f, 1.0f, 0.0f); + } + + return float3(0.0f, 0.0f, 0.0f); +} + +float3 HashGridDebugHashCollisions(float3 samplePosition, float3 sampleNormal, HashGridParameters gridParameters, HashMapData hashMapData) +{ + HashGridKey hashKey = HashGridComputeSpatialHash(samplePosition, sampleNormal, gridParameters); + uint gridLevel = HashGridGetLevel(samplePosition, gridParameters); + + HashGridIndex cacheIndex = HASH_GRID_INVALID_CACHE_INDEX; + uint hashCollisionsNum; + HashMapFind(hashMapData, hashKey, cacheIndex, hashCollisionsNum); + + float3 debugColor; + if (hashCollisionsNum == 0) + debugColor = float3(0.0f, 0.0f, 1.0f); + else if (hashCollisionsNum == 1) + debugColor = float3(0.0f, 0.5f, 0.5f); + else if (hashCollisionsNum == 2) + debugColor = float3(0.0f, 1.0f, 0.0f); + else if (hashCollisionsNum == 3) + debugColor = float3(1.0f, 1.0f, 0.0f); + else if (hashCollisionsNum == 4) + debugColor = float3(0.75f, 0.25f, 0.0f); + else + debugColor = float3(1.0f, 0.0f, 0.0f); + + return debugColor; +} diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/SharcCommon.h b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/SharcCommon.h new file mode 100644 index 0000000000..29c3c3605f --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/SharcCommon.h @@ -0,0 +1,499 @@ +/* + * Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +// Version +#define SHARC_VERSION_MAJOR 1 +#define SHARC_VERSION_MINOR 6 +#define SHARC_VERSION_BUILD 3 +#define SHARC_VERSION_REVISION 0 + +// Constants +#define SHARC_ACCUMULATED_FRAME_NUM_BIT_OFFSET 0 +#define SHARC_ACCUMULATED_FRAME_NUM_BIT_NUM 16 +#define SHARC_ACCUMULATED_FRAME_NUM_BIT_MASK ((1 << SHARC_ACCUMULATED_FRAME_NUM_BIT_NUM) - 1) +#define SHARC_STALE_FRAME_NUM_BIT_OFFSET 16 +#define SHARC_STALE_FRAME_NUM_BIT_NUM 16 +#define SHARC_STALE_FRAME_NUM_BIT_MASK ((1 << SHARC_STALE_FRAME_NUM_BIT_NUM) - 1) +#define SHARC_GRID_LOGARITHM_BASE 2.0f +#define SHARC_BLEND_ADJACENT_LEVELS 1 // combine the data from adjacent levels on camera movement +#define SHARC_NORMALIZED_SAMPLE_NUM (1u << (SHARC_SAMPLE_NUM_BIT_NUM - 1)) +#define SHARC_ACCUMULATED_FRAME_NUM_MIN 1 // minimum number of frames to use for data accumulation +#define SHARC_ACCUMULATED_FRAME_NUM_MAX 1024 // maximum number of frames to use for data accumulation +#define SHARC_STALE_FRAME_NUM_MAX 1024 // maximum number of frames without new samples before the cache entry is evicted + +// Tweakable parameters +#ifndef SHARC_SAMPLE_NUM_THRESHOLD +# define SHARC_SAMPLE_NUM_THRESHOLD 0 // elements with sample count above this threshold will be used for early-out/resampling +#endif + +#ifndef SHARC_SEPARATE_EMISSIVE +# define SHARC_SEPARATE_EMISSIVE 0 // if enabled, emissive values must be provided separately during updates. For cache queries, you can either supply them directly or include them in the query result +#endif + +#ifndef SHARC_MATERIAL_DEMODULATION +# define SHARC_MATERIAL_DEMODULATION 0 // enable material demodulation to preserve material details +#endif + +#ifndef SHARC_PROPAGATION_DEPTH +# define SHARC_PROPAGATION_DEPTH 4 // controls the amount of vertices stored in memory for signal backpropagation +#endif + +#ifndef SHARC_LINEAR_PROBE_WINDOW_SIZE +# define SHARC_LINEAR_PROBE_WINDOW_SIZE 4 // size of the linear search window for probe lookups +#endif + +#ifndef SHARC_ENABLE_CACHE_RESAMPLING +# define SHARC_ENABLE_CACHE_RESAMPLING (SHARC_UPDATE && (SHARC_PROPAGATION_DEPTH > 1)) // resamples the cache during update step +#endif + +#ifndef SHARC_RESAMPLING_DEPTH_MIN +# define SHARC_RESAMPLING_DEPTH_MIN 1 // controls minimum path depth which can be used with cache resampling +#endif + +#ifndef SHARC_STALE_FRAME_NUM_MIN +# define SHARC_STALE_FRAME_NUM_MIN 8 // minimum number of frames to keep the element in the cache +#endif + +#ifndef SHARC_GRID_LEVEL_BIAS +# define SHARC_GRID_LEVEL_BIAS 0 // LOD bias - positive adds extra magnified levels, negative reduces levels +#endif + +#ifndef SHARC_USE_FP16 +# define SHARC_USE_FP16 0 // use fp16 for sample weights storage +#endif + +#ifndef RW_STRUCTURED_BUFFER +# define RW_STRUCTURED_BUFFER(name, type) RWStructuredBuffer name +#endif + +#ifndef BUFFER_AT_OFFSET +# define BUFFER_AT_OFFSET(name, offset) name[offset] +#endif + +#if SHARC_USE_FP16 +# define SharcSampleWeight float16_t3 +#else // !SHARC_USE_FP16 +# define SharcSampleWeight float3 +#endif // SHARC_USE_FP16 + +/* + * RTXGI2 DIVERGENCE: + * Use SHARC_ENABLE_64_BIT_ATOMICS instead of SHARC_DISABLE_64_BIT_ATOMICS + * (Prefer 'enable' bools over 'disable' to avoid unnecessary mental gymnastics) + * Automatically set SHARC_ENABLE_64_BIT_ATOMICS if we're using DXC and it's not defined. + */ +#if !defined(SHARC_ENABLE_64_BIT_ATOMICS) && defined(__DXC_VERSION_MAJOR) +// Use DXC macros to figure out if 64-bit atomics are possible from the current shader model +# if __SHADER_TARGET_MAJOR < 6 +# define SHARC_ENABLE_64_BIT_ATOMICS 0 +# elif __SHADER_TARGET_MAJOR > 6 +# define SHARC_ENABLE_64_BIT_ATOMICS 1 +# else +// 6.x +# if __SHADER_TARGET_MINOR < 6 +# define SHARC_ENABLE_64_BIT_ATOMICS 0 +# else +# define SHARC_ENABLE_64_BIT_ATOMICS 1 +# endif +# endif +#elif !defined(SHARC_ENABLE_64_BIT_ATOMICS) +// Not DXC, and SHARC_ENABLE_64_BIT_ATOMICS not defined +# error "Please define SHARC_ENABLE_64_BIT_ATOMICS as 0 or 1" +#endif + +#if SHARC_ENABLE_64_BIT_ATOMICS +# define HASH_GRID_ENABLE_64_BIT_ATOMICS 1 +#else +# define HASH_GRID_ENABLE_64_BIT_ATOMICS 0 +#endif + +#include "HashGridCommon.h" +#include "SharcTypes.h" + +struct SharcParameters +{ + HashGridParameters gridParameters; + HashMapData hashMapData; + float radianceScale; // quantization factor for atomic radiance accumulation (u32 per channel during SHARC_UPDATE). Start with 1e3f; reduce for large radiance values to prevent overflow + bool enableAntiFireflyFilter; + + RW_STRUCTURED_BUFFER(accumulationBuffer, SharcAccumulationData); + RW_STRUCTURED_BUFFER(resolvedBuffer, SharcPackedData); +}; + +struct SharcState +{ +#if SHARC_UPDATE + HashGridIndex cacheIndices[SHARC_PROPAGATION_DEPTH]; + SharcSampleWeight sampleWeights[SHARC_PROPAGATION_DEPTH]; + uint pathLength; +#endif // SHARC_UPDATE + uint placeholder; +}; + +struct SharcHitData +{ + float3 positionWorld; + float3 normalWorld; // geometry normal in world space. Shading or object-space normals should work, but are not generally recommended +#if SHARC_MATERIAL_DEMODULATION + float3 materialDemodulation; // demodulation factor used to preserve material details. Use > 0 when active; set to float3(1.0f, 1.0f, 1.0f) when unused +#endif // SHARC_MATERIAL_DEMODULATION +#if SHARC_SEPARATE_EMISSIVE + float3 emissive; // separate emissive improves behavior with dynamic lighting. Requires computing material emissive on each(even cached) hit +#endif // SHARC_SEPARATE_EMISSIVE +}; + +struct SharcVoxelData +{ + float3 accumulatedRadiance; + float accumulatedSampleNum; + uint accumulatedFrameNum; + uint staleFrameNum; + float luminanceM2; +}; + +struct SharcResolveParameters +{ + float3 cameraPositionPrev; // previous camera position + uint accumulationFrameNum; // maximum number of frames for the temporal accumulation window + uint staleFrameNumMax; // maximum number of frames without new samples before the cache entry is evicted + bool enableAntiFireflyFilter; // not used +}; + +SharcPackedData SharcPackVoxelData(float3 radiance, float sampleNum, uint accumulatedFrameNum, uint staleFrameNum) +{ + const float float16Max = 65504.0f; + + SharcPackedData packedData; + packedData.radianceData.x = float16_t(min(radiance.x, float16Max)); + packedData.radianceData.y = float16_t(min(radiance.y, float16Max)); + packedData.radianceData.z = float16_t(min(radiance.z, float16Max)); + packedData.radianceData.w = float16_t(min(sampleNum, float16Max)); + packedData.sampleData.x = accumulatedFrameNum | (staleFrameNum << SHARC_STALE_FRAME_NUM_BIT_OFFSET); + packedData.luminanceM2 = 0; // not used + + return packedData; +} + +SharcVoxelData SharcUnpackVoxelData(SharcPackedData packedData) +{ + SharcVoxelData voxelData; + voxelData.accumulatedRadiance.x = float(packedData.radianceData.x); + voxelData.accumulatedRadiance.y = float(packedData.radianceData.y); + voxelData.accumulatedRadiance.z = float(packedData.radianceData.z); + voxelData.accumulatedSampleNum = float(packedData.radianceData.w); + voxelData.accumulatedFrameNum = (packedData.sampleData >> SHARC_ACCUMULATED_FRAME_NUM_BIT_OFFSET) & SHARC_ACCUMULATED_FRAME_NUM_BIT_MASK; + voxelData.staleFrameNum = (packedData.sampleData >> SHARC_STALE_FRAME_NUM_BIT_OFFSET) & SHARC_STALE_FRAME_NUM_BIT_MASK; + voxelData.luminanceM2 = asfloat(packedData.luminanceM2); + + return voxelData; +} + +SharcVoxelData SharcGetVoxelData(RW_STRUCTURED_BUFFER(voxelDataBuffer, SharcPackedData), HashGridIndex cacheIndex) +{ + SharcVoxelData voxelData; + voxelData.accumulatedRadiance = float3(0, 0, 0); + voxelData.accumulatedSampleNum = 0; + voxelData.accumulatedFrameNum = 0; + voxelData.staleFrameNum = 0; + + if (cacheIndex == HASH_GRID_INVALID_CACHE_INDEX) + return voxelData; + + SharcPackedData packedData = BUFFER_AT_OFFSET(voxelDataBuffer, cacheIndex); + + return SharcUnpackVoxelData(packedData); +} + +void SharcAddVoxelData(in SharcParameters sharcParameters, HashGridIndex cacheIndex, float3 sampleValue, float3 sampleWeight, uint sampleData) +{ + if (cacheIndex == HASH_GRID_INVALID_CACHE_INDEX) + return; + + if (sharcParameters.enableAntiFireflyFilter) { + const float3 luma = float3(0.213f, 0.715f, 0.072f); + float scalarWeight = dot(sampleWeight, luma); + scalarWeight = max(scalarWeight, 1.0f); + + const float sampleWeightThreshold = 2.0f; + if (scalarWeight > sampleWeightThreshold) { + SharcPackedData dataPackedPrev = BUFFER_AT_OFFSET(sharcParameters.resolvedBuffer, cacheIndex); + float sampleNumPrev = float(dataPackedPrev.radianceData.w); + const float sampleConfidenceThreshold = 2.0f; + if (sampleNumPrev > sampleConfidenceThreshold) { + float luminancePrev = max(dot(float3(dataPackedPrev.radianceData.xyz), luma), 1.0f); + float luminanceCur = max(dot(sampleValue * sampleWeight, luma), 1.0f); + float t = saturate((sampleNumPrev - 2.0f) / 10.0f); + float confidenceScale = lerp(5.0f, 10.0f, t); + sampleWeight *= saturate(confidenceScale * luminancePrev / luminanceCur); + } else { + scalarWeight = pow(scalarWeight, 0.5f); + sampleWeight /= scalarWeight; + } + } + } + + uint3 scaledRadiance = uint3(sampleValue * sampleWeight * sharcParameters.radianceScale); + + if (scaledRadiance.x != 0) + InterlockedAdd(BUFFER_AT_OFFSET(sharcParameters.accumulationBuffer, cacheIndex).data.x, scaledRadiance.x); + if (scaledRadiance.y != 0) + InterlockedAdd(BUFFER_AT_OFFSET(sharcParameters.accumulationBuffer, cacheIndex).data.y, scaledRadiance.y); + if (scaledRadiance.z != 0) + InterlockedAdd(BUFFER_AT_OFFSET(sharcParameters.accumulationBuffer, cacheIndex).data.z, scaledRadiance.z); + if (sampleData != 0) + InterlockedAdd(BUFFER_AT_OFFSET(sharcParameters.accumulationBuffer, cacheIndex).data.w, sampleData); +} + +void SharcInit(inout SharcState sharcState) +{ +#if SHARC_UPDATE + sharcState.pathLength = 0; +#endif // SHARC_UPDATE +} + +void SharcUpdateMiss(in SharcParameters sharcParameters, in SharcState sharcState, float3 radiance) +{ +#if SHARC_UPDATE + for (int i = 0; i < sharcState.pathLength; ++i) + SharcAddVoxelData(sharcParameters, sharcState.cacheIndices[i], radiance, sharcState.sampleWeights[i], 0); +#endif // SHARC_UPDATE +} + +bool SharcUpdateHit(in SharcParameters sharcParameters, inout SharcState sharcState, SharcHitData sharcHitData, float3 directLighting, float random) +{ + bool continueTracing = true; +#if SHARC_UPDATE + HashGridIndex cacheIndex = HashMapInsertEntry(sharcParameters.hashMapData, sharcHitData.positionWorld, sharcHitData.normalWorld, sharcParameters.gridParameters); + + float3 sharcRadiance = directLighting; + float3 materialDemodulation = float3(1.0f, 1.0f, 1.0f); +# if SHARC_MATERIAL_DEMODULATION + materialDemodulation = sharcHitData.materialDemodulation; +# endif // SHARC_MATERIAL_DEMODULATION + +# if SHARC_ENABLE_CACHE_RESAMPLING + uint resamplingDepth = uint(round(lerp(SHARC_RESAMPLING_DEPTH_MIN, SHARC_PROPAGATION_DEPTH - 1, random))); + if (resamplingDepth <= sharcState.pathLength) { + SharcVoxelData voxelData = SharcGetVoxelData(sharcParameters.resolvedBuffer, cacheIndex); + if (voxelData.accumulatedSampleNum > SHARC_SAMPLE_NUM_THRESHOLD) { + sharcRadiance = voxelData.accumulatedRadiance; + sharcRadiance *= materialDemodulation; + continueTracing = false; + } + } +# endif // SHARC_ENABLE_CACHE_RESAMPLING + + if (continueTracing) + SharcAddVoxelData(sharcParameters, cacheIndex, directLighting / materialDemodulation, float3(1.0f, 1.0f, 1.0f), 1); + +# if SHARC_SEPARATE_EMISSIVE + sharcRadiance += sharcHitData.emissive; +# endif // SHARC_SEPARATE_EMISSIVE + + uint i; + for (i = 0; i < sharcState.pathLength; ++i) + SharcAddVoxelData(sharcParameters, sharcState.cacheIndices[i], sharcRadiance, sharcState.sampleWeights[i], 0); + + for (i = sharcState.pathLength; i > 0; --i) { + sharcState.cacheIndices[i] = sharcState.cacheIndices[i - 1]; + sharcState.sampleWeights[i] = sharcState.sampleWeights[i - 1]; + } + + sharcState.cacheIndices[0] = cacheIndex; + sharcState.sampleWeights[0] = SharcSampleWeight(1.0f / materialDemodulation); + sharcState.pathLength = min(++sharcState.pathLength, SHARC_PROPAGATION_DEPTH - 1); +#endif // SHARC_UPDATE + return continueTracing; +} + +void SharcSetThroughput(inout SharcState sharcState, float3 throughput) +{ +#if SHARC_UPDATE + for (uint i = 0; i < sharcState.pathLength; ++i) + sharcState.sampleWeights[i] *= SharcSampleWeight(throughput); +#endif // SHARC_UPDATE +} + +bool SharcGetCachedRadiance(in SharcParameters sharcParameters, in SharcHitData sharcHitData, out float3 radiance, bool debug) +{ + if (debug) + radiance = float3(0, 0, 0); + const uint sampleThreshold = debug ? 0 : SHARC_SAMPLE_NUM_THRESHOLD; + + HashGridIndex cacheIndex = HashMapFindEntry(sharcParameters.hashMapData, sharcHitData.positionWorld, sharcHitData.normalWorld, sharcParameters.gridParameters); + if (cacheIndex == HASH_GRID_INVALID_CACHE_INDEX) + return false; + + SharcVoxelData voxelData = SharcGetVoxelData(sharcParameters.resolvedBuffer, cacheIndex); + if (voxelData.accumulatedSampleNum > sampleThreshold) { + radiance = voxelData.accumulatedRadiance; +#if SHARC_MATERIAL_DEMODULATION + radiance *= sharcHitData.materialDemodulation; +#endif // SHARC_MATERIAL_DEMODULATION +#if SHARC_SEPARATE_EMISSIVE + radiance += sharcHitData.emissive; +#endif // SHARC_SEPARATE_EMISSIVE + + return true; + } + + return false; +} + +int SharcGetGridDistance2(int3 position) +{ + return position.x * position.x + position.y * position.y + position.z * position.z; +} + +HashGridKey SharcGetAdjacentLevelHashKey(HashGridKey hashKey, HashGridParameters gridParameters, float3 cameraPositionPrev) +{ + const int signBit = 1 << (HASH_GRID_POSITION_BIT_NUM - 1); + const int signMask = ~((1 << HASH_GRID_POSITION_BIT_NUM) - 1); + + int3 gridPosition; + gridPosition.x = int((hashKey >> HASH_GRID_POSITION_BIT_NUM * 0) & HASH_GRID_POSITION_BIT_MASK); + gridPosition.y = int((hashKey >> HASH_GRID_POSITION_BIT_NUM * 1) & HASH_GRID_POSITION_BIT_MASK); + gridPosition.z = int((hashKey >> HASH_GRID_POSITION_BIT_NUM * 2) & HASH_GRID_POSITION_BIT_MASK); + + // Fix negative coordinates + gridPosition.x = ((gridPosition.x & signBit) != 0) ? gridPosition.x | signMask : gridPosition.x; + gridPosition.y = ((gridPosition.y & signBit) != 0) ? gridPosition.y | signMask : gridPosition.y; + gridPosition.z = ((gridPosition.z & signBit) != 0) ? gridPosition.z | signMask : gridPosition.z; + + int level = int((hashKey >> (HASH_GRID_POSITION_BIT_NUM * 3)) & HASH_GRID_LEVEL_BIT_MASK); + + float voxelSize = HashGridGetVoxelSize(level, gridParameters); + int3 cameraGridPosition = int3(floor((gridParameters.cameraPosition + HASH_GRID_POSITION_OFFSET) / voxelSize)); + int3 cameraVector = cameraGridPosition - gridPosition; + int cameraDistance = SharcGetGridDistance2(cameraVector); + + int3 cameraGridPositionPrev = int3(floor((cameraPositionPrev + HASH_GRID_POSITION_OFFSET) / voxelSize)); + int3 cameraVectorPrev = cameraGridPositionPrev - gridPosition; + int cameraDistancePrev = SharcGetGridDistance2(cameraVectorPrev); + + if (cameraDistance < cameraDistancePrev) { + gridPosition = int3(floor(gridPosition / gridParameters.logarithmBase)); + level = min(level + 1, int(HASH_GRID_LEVEL_BIT_MASK)); + } else // this may be inaccurate + { + gridPosition = int3(floor(gridPosition * gridParameters.logarithmBase)); + level = max(level - 1, 1); + } + + HashGridKey modifiedHashGridKey = ((uint64_t(gridPosition.x) & HASH_GRID_POSITION_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 0)) | ((uint64_t(gridPosition.y) & HASH_GRID_POSITION_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 1)) | ((uint64_t(gridPosition.z) & HASH_GRID_POSITION_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 2)) | ((uint64_t(level) & HASH_GRID_LEVEL_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 3)); + +#if HASH_GRID_USE_NORMALS + modifiedHashGridKey |= hashKey & (uint64_t(HASH_GRID_NORMAL_BIT_MASK) << (HASH_GRID_POSITION_BIT_NUM * 3 + HASH_GRID_LEVEL_BIT_NUM)); +#endif // HASH_GRID_USE_NORMALS + + return modifiedHashGridKey; +} + +void SharcResolveEntry(uint entryIndex, SharcParameters sharcParameters, SharcResolveParameters resolveParameters) +{ + if (entryIndex >= sharcParameters.hashMapData.capacity) + return; + + HashGridKey hashKey = BUFFER_AT_OFFSET(sharcParameters.hashMapData.hashEntriesBuffer, entryIndex); + if (hashKey == HASH_GRID_INVALID_HASH_KEY) + return; + + SharcAccumulationData accumulatedData = BUFFER_AT_OFFSET(sharcParameters.accumulationBuffer, entryIndex); + SharcPackedData resolvedData = BUFFER_AT_OFFSET(sharcParameters.resolvedBuffer, entryIndex); + SharcVoxelData sharcVoxelData = SharcUnpackVoxelData(resolvedData); + + float sampleNum = float(accumulatedData.data.w); + float sampleNumPrev = sharcVoxelData.accumulatedSampleNum; + uint accumulatedFrameNum = sharcVoxelData.accumulatedFrameNum + 1; + uint staleFrameNum = sharcVoxelData.staleFrameNum; + + staleFrameNum = (sampleNum != 0) ? 0 : staleFrameNum + 1; + uint staleFrameNumMax = clamp(resolveParameters.staleFrameNumMax, SHARC_STALE_FRAME_NUM_MIN, SHARC_STALE_FRAME_NUM_MAX); + bool isValidElement = (staleFrameNum < staleFrameNumMax) ? true : false; + + if (!isValidElement) { + SharcAccumulationData zeroAccumulationData; + zeroAccumulationData.data = uint4(0, 0, 0, 0); + + SharcPackedData zeroPackedData; + zeroPackedData.radianceData = float16_t4(0, 0, 0, 0); + zeroPackedData.sampleData = 0; + zeroPackedData.luminanceM2 = 0; + + BUFFER_AT_OFFSET(sharcParameters.hashMapData.hashEntriesBuffer, entryIndex) = HASH_GRID_INVALID_HASH_KEY; + BUFFER_AT_OFFSET(sharcParameters.accumulationBuffer, entryIndex) = zeroAccumulationData; + BUFFER_AT_OFFSET(sharcParameters.resolvedBuffer, entryIndex) = zeroPackedData; + return; + } else if (sampleNum == 0) { + InterlockedAdd(BUFFER_AT_OFFSET(sharcParameters.resolvedBuffer, entryIndex).sampleData, (1 << SHARC_ACCUMULATED_FRAME_NUM_BIT_OFFSET) | (1 << SHARC_STALE_FRAME_NUM_BIT_OFFSET)); + return; + } + + // Hash map lookup to find previous data if there were hash collisions during previous insertion and this frame a new empty slot got assigned + // This is a linear probe search with fixed window size + if (sampleNumPrev == 0) { + for (uint i = entryIndex + 1; i < min(entryIndex + 1 + SHARC_LINEAR_PROBE_WINDOW_SIZE, sharcParameters.hashMapData.capacity); ++i) { + HashGridKey hashKeyOld = BUFFER_AT_OFFSET(sharcParameters.hashMapData.hashEntriesBuffer, i); + if (hashKeyOld == hashKey) { + resolvedData = BUFFER_AT_OFFSET(sharcParameters.resolvedBuffer, i); + sharcVoxelData = SharcUnpackVoxelData(resolvedData); + sampleNumPrev = sharcVoxelData.accumulatedSampleNum; + accumulatedFrameNum = sharcVoxelData.accumulatedFrameNum + 1; + staleFrameNum = 0; + break; + } + } + } + + float3 accumulatedRadiance = float3(accumulatedData.data.xyz) * rcp(sharcParameters.radianceScale); + float3 accumulatedRadiancePrev = sharcVoxelData.accumulatedRadiance; + + uint accumulationFrameNum = clamp(resolveParameters.accumulationFrameNum, SHARC_ACCUMULATED_FRAME_NUM_MIN, SHARC_ACCUMULATED_FRAME_NUM_MAX); + if (accumulatedFrameNum > accumulationFrameNum) { + float normalizationScale = float(accumulationFrameNum) / float(accumulatedFrameNum); + accumulatedFrameNum = accumulationFrameNum; + sampleNumPrev *= normalizationScale; + } + + float sampleTotalInv = rcp(sampleNumPrev + sampleNum); + + accumulatedRadiance = accumulatedRadiance / max(sampleNum, 1e-6f); + accumulatedRadiance = sampleNumPrev * sampleTotalInv * accumulatedRadiancePrev + sampleNum * sampleTotalInv * accumulatedRadiance; + float accumulatedSampleNum = sampleNumPrev + sampleNum; + +#if SHARC_BLEND_ADJACENT_LEVELS + // Reproject sample from adjacent level + float3 cameraOffset = sharcParameters.gridParameters.cameraPosition.xyz - resolveParameters.cameraPositionPrev.xyz; + if ((dot(cameraOffset, cameraOffset) > 1e-6f) && (accumulatedFrameNum < resolveParameters.accumulationFrameNum)) { + HashGridKey adjacentLevelHashKey = SharcGetAdjacentLevelHashKey(hashKey, sharcParameters.gridParameters, resolveParameters.cameraPositionPrev); + + HashGridIndex cacheIndex = HASH_GRID_INVALID_CACHE_INDEX; + uint hashCollisionsNum; + if (HashMapFind(sharcParameters.hashMapData, adjacentLevelHashKey, cacheIndex, hashCollisionsNum)) { + SharcPackedData adjacentPackedDataPrev = BUFFER_AT_OFFSET(sharcParameters.resolvedBuffer, cacheIndex); + SharcVoxelData adjacentVoxelDataPrev = SharcUnpackVoxelData(adjacentPackedDataPrev); + float adjacentSampleNum = adjacentVoxelDataPrev.accumulatedSampleNum; + if (adjacentSampleNum > SHARC_SAMPLE_NUM_THRESHOLD) { + float blendWeight = rcp(adjacentSampleNum + accumulatedSampleNum); + accumulatedRadiance = adjacentSampleNum * blendWeight * adjacentVoxelDataPrev.accumulatedRadiance + accumulatedSampleNum * blendWeight * accumulatedRadiance.xyz; + accumulatedSampleNum += adjacentSampleNum; + } + } + } +#endif // SHARC_BLEND_ADJACENT_LEVELS + + BUFFER_AT_OFFSET(sharcParameters.resolvedBuffer, entryIndex) = SharcPackVoxelData(accumulatedRadiance, accumulatedSampleNum, accumulatedFrameNum, staleFrameNum); + + // Clear buffer entry for the next frame + SharcAccumulationData zeroAccumulationData; + zeroAccumulationData.data = uint4(0, 0, 0, 0); + BUFFER_AT_OFFSET(sharcParameters.accumulationBuffer, entryIndex) = zeroAccumulationData; +} diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/SharcTypes.h b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/SharcTypes.h new file mode 100644 index 0000000000..ad2655ce1b --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRC/SharcTypes.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +#ifndef SHARC_TYPES_H +#define SHARC_TYPES_H + +struct SharcAccumulationData +{ + uint4 data; +}; + +struct SharcPackedData +{ + float16_t4 radianceData; + uint sampleData; + uint luminanceM2; +}; + +#if SHARC_ENABLE_GLSL +layout(buffer_reference, std430, buffer_reference_align = 16) buffer RWStructuredBuffer_SharcAccumulationData +{ + SharcAccumulationData data[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 16) buffer RWStructuredBuffer_SharcPackedData +{ + SharcPackedData data[]; +}; +#endif // SHARC_ENABLE_GLSL + +#endif // SHARC_TYPES_H diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRCHelper.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRCHelper.hlsli new file mode 100644 index 0000000000..ff0843b9e6 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/SHaRCHelper.hlsli @@ -0,0 +1,63 @@ +#ifdef SHARC + +# ifndef SHARC_HELPER_DEPENDENCY_HLSL +# define SHARC_HELPER_DEPENDENCY_HLSL + +#include "Common/Game.hlsli" +#include "Raytracing/Includes/Common.hlsli" + +uint Hash(uint2 idx) +{ + return (idx.x * 73856093u) ^ (idx.y * 19349663u); +} + +HashGridParameters GetSharcGridParameters() +{ + HashGridParameters gridParameters; + { + gridParameters.cameraPosition = Frame.Position; + gridParameters.sceneScale = Frame.SHaRC.SceneScale * M_TO_GAME_UNIT; + gridParameters.logarithmBase = SHARC_GRID_LOGARITHM_BASE; + gridParameters.levelBias = SHARC_GRID_LEVEL_BIAS; + } + + return gridParameters; +} + +SharcParameters GetSharcParameters() +{ + SharcParameters sharcParameters; + { + sharcParameters.gridParameters = GetSharcGridParameters(); + + sharcParameters.hashMapData.capacity = Frame.SHaRC.Capacity; + sharcParameters.hashMapData.hashEntriesBuffer = u_SharcHashEntriesBuffer; + +#if !SHARC_ENABLE_64_BIT_ATOMICS && !SHARC_RESOLVE + sharcParameters.hashMapData.lockBuffer = u_SharcLockBuffer; +#endif // !SHARC_ENABLE_64_BIT_ATOMICS + + sharcParameters.accumulationBuffer = u_SharcAccumulationBuffer; + sharcParameters.resolvedBuffer = u_SharcResolvedBuffer; + sharcParameters.radianceScale = Frame.SHaRC.RadianceScale; + } + + return sharcParameters; +} + +SharcResolveParameters GetSharcResolveParameters() +{ + SharcResolveParameters resolveParameters; + { + resolveParameters.accumulationFrameNum = Frame.SHaRC.AccumFrameNum; + resolveParameters.staleFrameNumMax = Frame.SHaRC.StaleFrameNum; + resolveParameters.cameraPositionPrev = Frame.PositionPrev; + resolveParameters.enableAntiFireflyFilter = Frame.SHaRC.AntifireflyFilter; + } + + return resolveParameters; +} + +# endif // SHARC_HELPER_DEPENDENCY_HLSL + +#endif // SHARC \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/Shading.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/Shading.hlsli new file mode 100644 index 0000000000..5c0f8ac84b --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/Shading.hlsli @@ -0,0 +1,336 @@ +#ifndef SHADING_HLSL +#define SHADING_HLSL + +#include "Common/Game.hlsli" +#include "Common/BRDF.hlsli" + +#include "Raytracing/Includes/AdvancedSettings.hlsli" + +#include "Raytracing/Includes/Types.hlsli" +#include "Raytracing/Includes/Registers.hlsli" +#include "Raytracing/Includes/Common.hlsli" +#include "Raytracing/Includes/ColorConversions.hlsli" +#include "Raytracing/Includes/RT/CommonRT.hlsli" +#include "Raytracing/Includes/RT/Rays.hlsli" +#include "Raytracing/Includes/MonteCarlo.hlsli" +#include "Raytracing/Includes/Surface.hlsli" + +#include "Raytracing/Includes/Materials/BSDF.hlsli" + +static const float ISL_SCALE = 0.8f; +static const float ISL_METRES_TO_UNITS = 70.f; +static const float ISL_METRES_TO_UNITS_SQ = ISL_METRES_TO_UNITS * ISL_METRES_TO_UNITS; +static const float ISL_SCALED_UNITS_SQ = ISL_SCALE * ISL_METRES_TO_UNITS_SQ; + +float2 EvalHemiUV(float3 dir) +{ + dir.z = max(dir.z, 0.0f); + + float r = sqrt(1.0f - dir.z); + float phi = atan2(dir.y, dir.x); + + float2 disk = float2(cos(phi), sin(phi)) * r; + return disk * 0.5f + 0.5f; +} + +// Samples the sky hemisphere texture based on the given direction +// Output is in true linear space +float3 SampleSky(float3 dir) +{ + float2 uv = EvalHemiUV(dir); + + float3 color = SkyHemisphere.SampleLevel(BaseSampler, uv, 0.0f).rgb; + + return LLGammaToTrueLinear(color); +} + +float EvalSkyOcclusion(float3 dir) +{ + float2 uv = EvalHemiUV(dir); + + return lerp(1.0f, 1.0f - SkyHemisphere.SampleLevel(BaseSampler, uv, 0.0f).a, Frame.CloudOpacity); +} + +float3 EvalDiffuse(in float3 l, in Surface surface, in BRDFContext brdfContext) +{ + float NdotL = saturate(dot(surface.Normal, l)); + + if (NdotL <= 0.0f) + return float3(0.0f, 0.0f, 0.0f); + + // Diffuse is meant to be very light (and used with DDGI), so I don't see much point in using a different diffuse or shading model here + return surface.DiffuseAlbedo * NdotL * BRDF::Diffuse_Lambert(); +} + +float3 EvalLight(in float3 l, in Material material, in Surface surface, in BRDFContext brdfContext, in StandardBSDF bsdf) +{ +#if LIGHTEVAL_MODE == LIGHTEVAL_MODE_DIFFUSE + return EvalDiffuse(l, surface, brdfContext); +#else + float4 bsdfEval = bsdf.Eval(brdfContext, material, surface, l); + return bsdfEval.xyz; +#endif +} + +void GetDirectionalLightIrradiance(out float3 irradiance, out float3 lr, inout uint randomSeed) +{ + irradiance = DirLightToLinear(Frame.Directional.Color) * EvalSkyOcclusion(Frame.Directional.Vector); + lr = Frame.Directional.Vector; + + // Sun angular radius is ~0.00465 radians (~0.266 degrees) + float cosSunDisk = cos(0.00465f); + lr = TangentToWorld(lr, SampleConeUniform(randomSeed, cosSunDisk)); +} + +float3 EvalDirectionalLight(in Material material, in Surface surface, in BRDFContext brdfContext, in StandardBSDF bsdf, inout uint randomSeed) +{ + float3 irradiance; + float3 lr; + GetDirectionalLightIrradiance(irradiance, lr, randomSeed); + float3 direct = EvalLight(lr, material, surface, brdfContext, bsdf) * irradiance; + [branch] + if (any(direct > MIN_DIFFUSE_SHADOW)) + { + direct *= TraceRayShadow(Scene, surface, lr, randomSeed); + } + else + { + direct = 0.0f; + } + + return direct; +} + +float GetAttenuation(Light light, float dist, inout float lightSourceAngle) +{ + float atten = 0.0f; + if ((light.Flags & LightFlags::ISL) != 0) + { + float invSq = ISL_SCALED_UNITS_SQ * rcp(dist * dist + light.SizeBias); + float t = saturate((light.Radius - dist) * light.FadeZone); + float fastSmoothstep = t * t * (3.0f - 2.0f * t); + atten = invSq * fastSmoothstep; + float size = sqrt((light.SizeBias * 2.0f) / (0.8 * 4900)); + lightSourceAngle = atan2(size, dist); + } + else + { + float intensityFactor = saturate(dist * light.InvRadius); + atten = 1.0f - intensityFactor * intensityFactor; + } + return atten; +} + +float GetLightSampleWeight(Surface surface, Light light) +{ + float3 l = (light.Vector - surface.Position); + float dist = length(l) * GAME_UNIT_TO_M; + float lightSourceAngle = 0.0f; + float atten = GetAttenuation(light, dist, lightSourceAngle); + float intensity = max(light.Color.r, max(light.Color.g, light.Color.b)) * light.Fade; + return atten * intensity; +} + +// Get irradiance for point light without BRDF evaluation +void GetPointLightIrradiance(in LightData lightData, in Surface surface, out float3 irradiance, out float3 lr, out float dist, inout uint randomSeed) +{ + if (lightData.Count == 0) + { + irradiance = float3(0, 0, 0); + lr = float3(0, 0, 0); + dist = 0.0f; + return; + } + + float lightWeight = float(lightData.Count); + +#if defined(RIS) + const uint candidateCount = min(RIS_MAX_CANDIDATES, lightData.Count); + uint selectedLightID = 0; + float totalWeight = 0.0f; + float selectedWeight = 0.0f; + + for (uint i = 0; i < candidateCount; i++) + { + uint lightIdx = min(uint(Random(randomSeed) * lightData.Count), lightData.Count - 1); + uint lightID = lightData.GetID(lightIdx); + Light testLight = Lights[lightID]; + const bool isTestLinear = (testLight.Flags & LightFlags::LinearLight) != 0; + testLight.Color = PointLightToLinear(testLight.Color, isTestLinear); + float weight = GetLightSampleWeight(surface, testLight); + totalWeight += weight; + + if (Random(randomSeed) * totalWeight < weight) + { + selectedLightID = lightID; + selectedWeight = weight; + } + } + if (totalWeight == 0.0f) + { + irradiance = float3(0, 0, 0); + lr = float3(0, 0, 0); + return; + } + + float risWeight = (totalWeight / max(selectedWeight, 1e-7f)) / float(candidateCount); + + lightWeight *= risWeight; + + Light light = Lights[selectedLightID]; +#else + + uint lightIdx = min(uint(Random(randomSeed) * lightData.Count), lightData.Count - 1); + uint lightID = lightData.GetID(lightIdx); + Light light = Lights[lightID]; +#endif + + const bool isLinear = (light.Flags & LightFlags::LinearLight) != 0; + light.Color = PointLightToLinear(light.Color, isLinear); + + lr = (light.Vector - surface.Position); + dist = length(lr); + lr /= dist; + + float lightSourceAngle = 0.005f; + + float atten = GetAttenuation(light, dist, lightSourceAngle); + + irradiance = light.Color * light.Fade * atten * lightWeight; + lr = TangentToWorld(lr, SampleCosineHemisphereScaled(randomSeed, lightSourceAngle)); +} + +float3 EvalPointLight(in Material material, in Surface surface, in BRDFContext brdfContext, in LightData lightData, in StandardBSDF bsdf, inout uint randomSeed) +{ + float3 lightIrradiance; + float3 lr; + float dist; + GetPointLightIrradiance(lightData, surface, lightIrradiance, lr, dist, randomSeed); + + float3 direct = EvalLight(lr, material, surface, brdfContext, bsdf) * lightIrradiance; + + [branch] + if (any(direct > MIN_DIFFUSE_SHADOW)) + { + direct *= TraceRayShadowFinite(Scene, surface, lr, dist, randomSeed); + } + else + { + direct = 0.0f; + } + + return direct; +} + +float3 EvaluateDirectRadiance(in Material material, in Surface surface, in BRDFContext brdfContext, in Instance instance, in StandardBSDF bsdf, inout uint randomSeed) +{ + float3 radiance = EvalDirectionalLight(material, surface, brdfContext, bsdf, randomSeed); + radiance += EvalPointLight(material, surface, brdfContext, instance.LightData, bsdf, randomSeed); + + return radiance; +} + +void GetLightIrradianceMIS(in Instance instance, in Surface surface, out float3 irradiance, out float3 lr, out float distance, inout uint randomSeed) +{ + float3 directionalIrradiance; + float3 dirLr; + GetDirectionalLightIrradiance(directionalIrradiance, dirLr, randomSeed); + + float3 pointIrradiance; + float3 pointLr; + float pointDist; + GetPointLightIrradiance(instance.LightData, surface, pointIrradiance, pointLr, pointDist, randomSeed); + + float3 dirVisibility = TraceRayShadow(Scene, surface, dirLr, randomSeed); + + float pDirLight = Luminance(directionalIrradiance * dirVisibility); + float pPointLight = Luminance(pointIrradiance); + + float total = pDirLight + pPointLight; + if (total < 1e-6f) + { + irradiance = float3(0, 0, 0); + lr = float3(0, 0, 0); + distance = 0.0f; + return; + } + + float r = Random(randomSeed); + pDirLight /= total; + pPointLight /= total; + + if (r < pDirLight) + { + irradiance = directionalIrradiance / pDirLight; + lr = dirLr; + distance = SHADOW_RAY_TMAX; + } + else + { + irradiance = pointIrradiance / pPointLight; + lr = pointLr; + distance = pointDist; + } +} + +float3 EvaluateDirectRadianceMIS(in Material material, in Surface surface, in BRDFContext brdfContext, in Instance instance, in StandardBSDF bsdf, inout uint randomSeed) +{ + float3 lightIrradiance; + float3 lr; + float distance; + GetLightIrradianceMIS(instance, surface, lightIrradiance, lr, distance, randomSeed); + + float3 direct = EvalLight(lr, material, surface, brdfContext, bsdf) * lightIrradiance; + + return direct; +} + +bool ComputeTangentSpace(inout Surface surface, const bool ignoreTangent) +{ + // Check that tangent space exists and can be safely orthonormalized. + // Otherwise invent a tanget frame based on the normal. + // We check that: + // - Tangent exists, this is indicated by a nonzero sign (w). + // - It has nonzero length. Zeros can occur due to interpolation or bad assets. + // - It is not parallel to the normal. This can occur due to normal mapping or bad assets. + // - It does not have NaNs. These will propagate and trigger the fallback. + + float NdotT = dot(surface.GeomTangent, surface.Normal); + bool nonParallel = abs(NdotT) < 0.9999f; + bool nonZero = dot(surface.GeomTangent, surface.GeomTangent) > 0.f; + + bool valid = nonZero && nonParallel; + if (!ignoreTangent && valid) + { + surface.Tangent = normalize(surface.GeomTangent - surface.Normal * NdotT); + surface.Bitangent = cross(surface.Normal, surface.Tangent); + } + else + { + surface.Tangent = perp_stark(surface.Normal); + surface.Bitangent = cross(surface.Normal, surface.Tangent); + } + + return valid; +} + +void AdjustShadingNormal(inout Surface surface, BRDFContext brdfContext, uniform bool recomputeTangentSpace, const bool ignoreTangent) +{ + float3 Ng = dot(brdfContext.ViewDirection, surface.FaceNormal) >= 0.f ? surface.FaceNormal : -surface.FaceNormal; + float signN = dot(surface.Normal, Ng) >= 0.f ? 1.f : -1.f; + float3 Ns = signN * surface.Normal; + + // Blend the shading normal towards the geometric normal at grazing angles. + // This is to avoid the view vector from becoming back-facing. + const float kCosThetaThreshold = 0.1f; + float cosTheta = dot(brdfContext.ViewDirection, Ns); + if (cosTheta <= kCosThetaThreshold) + { + float t = saturate(cosTheta * (1.f / kCosThetaThreshold)); + surface.Normal = signN * normalize(lerp(Ng, Ns, t)); + } + if (cosTheta <= kCosThetaThreshold || recomputeTangentSpace) + ComputeTangentSpace(surface, ignoreTangent); +} + +#endif // SHADING_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/ShadowPayload.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/ShadowPayload.hlsli new file mode 100644 index 0000000000..d01b80427d --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/ShadowPayload.hlsli @@ -0,0 +1,11 @@ +#ifndef SHADOWPAYLOAD_HLSL +#define SHADOWPAYLOAD_HLSL + +struct ShadowPayload +{ + float missed; + float3 transmission; + uint randomSeed; +}; + +#endif // SHADOWPAYLOAD_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/Sharc.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/Sharc.hlsli new file mode 100644 index 0000000000..02f964e1b5 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/Sharc.hlsli @@ -0,0 +1,20 @@ +#ifdef SHARC + +# ifndef SHARC_DEPENDENCY_HLSL +# define SHARC_DEPENDENCY_HLSL + +# define SHARC_ENABLE_64_BIT_ATOMICS 0 + +# define SHARC_UPDATE 1 + +# ifndef SHARC_RESOLVE +# define SHARC_RESOLVE 0 +# endif + +# define SHARC_SEPARATE_EMISSIVE 1 + +# include "Raytracing/Includes/RT/SHARC/SharcCommon.h" + +# endif // SHARC_DEPENDENCY_HLSL + +#endif // SHARC \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/RT/SubsurfaceShading.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/RT/SubsurfaceShading.hlsli new file mode 100644 index 0000000000..371f9f68c5 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/RT/SubsurfaceShading.hlsli @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +#ifndef __SUBSURFACESHADING_HLSLI__ +#define __SUBSURFACESHADING_HLSLI__ + +#define USE_DIFFUSE_MEAN_FREE_PATH 1 + +#define SSS_TRANSMISSION_BSDF_SAMPLE_COUNT 1 +#define SSS_TRANSMISSION_PER_BSDF_SCATTERING_SAMPLE_COUNT 1 + +#include "Raytracing/Includes/RT/Shading.hlsli" +#include "Raytracing/Includes/RT/Rays.hlsli" + +#include "Raytracing/Includes/Materials/LobeType.hlsli" +#include "Raytracing/Includes/Materials/SubsurfaceScattering.hlsli" +#include "Raytracing/Includes/Materials/Transmission.hlsli" + +float3 evalSingleScatteringTransmission( + const Surface sourceSurface, + const BRDFContext sourceBRDFContext, + const Material sourceMaterial, + const Instance sourceInstance, + const SubsurfaceMaterialData subsurfaceMaterialData, + const SubsurfaceInteraction subsurfaceInteraction, + RayCone rayCone, + inout uint randomSeed) +{ + float3 radiance = float3(0.0f, 0.0f, 0.0f); + + const SubsurfaceMaterialCoefficients sssMaterialCoefficients = ComputeSubsurfaceMaterialCoefficients(subsurfaceMaterialData); + + for (int bsdfSampleIndex = 0; bsdfSampleIndex < SSS_TRANSMISSION_BSDF_SAMPLE_COUNT; ++bsdfSampleIndex) + { + // Trace rays for diffuse transmittance into the volume + const float3 refractedRayDirection = CalculateRefractionRay(subsurfaceInteraction, float2(Random(randomSeed), Random(randomSeed))); + const float3 hitPos = subsurfaceInteraction.centerPosition; + + float thickness = 0.0f; + float3 backPosition; + { + Payload payload; + payload.hitDistance = -1.0f; + payload.primitiveIndex = 0; + payload.PackBarycentrics(float2(0.0f, 0.0f)); + payload.PackInstanceGeometryIndex(0, 0); + payload.randomSeed = randomSeed; + + RayDesc transmissionRay; + transmissionRay.Origin = OffsetRay(hitPos, -sourceSurface.FaceNormal); + transmissionRay.Direction = refractedRayDirection; + transmissionRay.TMin = 0.0f; + transmissionRay.TMax = RAY_TMAX; + + const uint rayFlags = RAY_FLAG_FORCE_OPAQUE; + TraceRay(Scene, rayFlags, 0xFF, DIFFUSE_RAY_HITGROUP_IDX, 0, DIFFUSE_RAY_MISS_IDX, transmissionRay, payload); + randomSeed = payload.randomSeed; + + thickness = payload.hitDistance; + backPosition = transmissionRay.Origin + thickness * transmissionRay.Direction; + + if (payload.Hit()) + { + float3 localPosition = transmissionRay.Origin + refractedRayDirection * payload.hitDistance; + + Instance sampleInstance; + Material sampleMaterial; + Surface sampleSurface = Surface(localPosition, payload, refractedRayDirection, rayCone, sampleInstance, sampleMaterial); + + const float3 sampleGeometryNormal = sampleSurface.FaceNormal; + const float3 sampleShadingNormal = sampleSurface.Normal; + backPosition = OffsetRay(backPosition, sampleGeometryNormal, false); + + // Prepare data needed to evaluate the light + float3 incidentVector = 0.0f; + float lightDistance = 0.0f; + float3 irradiance = 0.0f; + GetLightIrradianceMIS(sampleInstance, sampleSurface, irradiance, incidentVector, lightDistance, randomSeed); + + const float3 vectorToLight = normalize(incidentVector); + + if (any(irradiance > MIN_DIFFUSE_SHADOW)) + { + const float3 lightVisibility = TraceRayShadowFinite(Scene, sampleSurface, vectorToLight, lightDistance, randomSeed); + if (any(lightVisibility > 0.0f)) + { + const float3 lightRadiance = irradiance * lightVisibility; + const float3 transmissionBsdf = EvaluateBoundaryTerm(sourceSurface.Normal, + vectorToLight, + refractedRayDirection, + sampleShadingNormal, + thickness, + sssMaterialCoefficients); + + // Li * bsdf * cosTheta / CosineLobePDF = Li * bsdf * cosTheta / (cosTheta / pi) = Li * bsdf * pi + radiance += lightRadiance * transmissionBsdf * K_PI; + } + } + } + } + + // Trace rays along the scattering ray + const float stepSize = thickness / (SSS_TRANSMISSION_PER_BSDF_SCATTERING_SAMPLE_COUNT + 1); + float accumulatedT = 0.0f; + float3 scatteringThroughput = float3(0.0f, 0.0f, 0.0f); + + for (int sampleIndex = 0; sampleIndex < SSS_TRANSMISSION_PER_BSDF_SCATTERING_SAMPLE_COUNT; ++sampleIndex) + { + // TODO: Important Sampling along the scattering ray direction + const float currentT = accumulatedT + stepSize; + accumulatedT = currentT; + + if (currentT >= thickness) + { + // TODO: Here should be continue if important sampling + break; + } + + const float3 samplePosition = hitPos + currentT * refractedRayDirection; + const float2 hgRnd = float2(Random(randomSeed), Random(randomSeed)); + const float3 scatteringDirection = SampleDirectionHenyeyGreenstein(hgRnd, subsurfaceMaterialData.g, refractedRayDirection); + + RayDesc scatteringRay; + scatteringRay.Origin = samplePosition; + scatteringRay.Direction = scatteringDirection; + scatteringRay.TMin = 0.0f; + scatteringRay.TMax = RAY_TMAX; + + Payload scatteringPayload; + scatteringPayload.hitDistance = -1.0f; + scatteringPayload.primitiveIndex = 0; + scatteringPayload.PackBarycentrics(float2(0.0f, 0.0f)); + scatteringPayload.PackInstanceGeometryIndex(0, 0); + scatteringPayload.randomSeed = randomSeed; + + const uint rayFlags = RAY_FLAG_FORCE_OPAQUE; + TraceRay(Scene, rayFlags, 0xFF, DIFFUSE_RAY_HITGROUP_IDX, 0, DIFFUSE_RAY_MISS_IDX, scatteringRay, scatteringPayload); + randomSeed = scatteringPayload.randomSeed; + + if (scatteringPayload.Hit()) + { + float3 scatterLocalPosition = scatteringRay.Origin + scatteringDirection * scatteringPayload.hitDistance; + + Instance scatterInstance; + Material scatterMaterial; + Surface scatterSurface = Surface(scatterLocalPosition, scatteringPayload, scatteringDirection, rayCone, scatterInstance, scatterMaterial); + + const float3 scatteringSampleGeometryNormal = scatterSurface.FaceNormal; + + float3 scatteringBoundaryPosition = samplePosition + scatteringPayload.hitDistance * scatteringDirection; + scatteringBoundaryPosition = OffsetRay(scatteringBoundaryPosition, scatteringSampleGeometryNormal, false); + + // Prepare data needed to evaluate the light + float3 incidentVector = 0.0f; + float lightDistance = 0.0f; + float3 irradiance = 0.0f; + GetLightIrradianceMIS(scatterInstance, scatterSurface, irradiance, incidentVector, lightDistance, randomSeed); + + const float3 vectorToLight = normalize(incidentVector); + + if (any(irradiance > MIN_DIFFUSE_SHADOW)) + { + const float3 lightVisibility = TraceRayShadowFinite(Scene, scatterSurface, vectorToLight, lightDistance, randomSeed); + if (any(lightVisibility > 0.0f)) + { + const float3 lightRadiance = irradiance * lightVisibility; + const float totalScatteringDistance = currentT + scatteringPayload.hitDistance; + const float3 ssTransmissionBsdf = EvaluateSingleScattering(vectorToLight, + scatterSurface.Normal, + totalScatteringDistance, + sssMaterialCoefficients); + + scatteringThroughput += lightRadiance * ssTransmissionBsdf * stepSize; // Li * BSDF / PDF + } + } + } + } + + radiance += scatteringThroughput / SSS_TRANSMISSION_PER_BSDF_SCATTERING_SAMPLE_COUNT; + } + + radiance /= SSS_TRANSMISSION_BSDF_SAMPLE_COUNT; + + return radiance; +} + +float3 EvaluateSubsurfaceNEE( + const Surface surface, + const BRDFContext brdfContext, + const Material material, + const Instance instance, + const Payload initialPayload, + RayCone rayCone, + inout uint randomSeed) +{ + SubsurfaceMaterialData subsurfaceMaterialData = CreateDefaultSubsurfaceMaterialData(); + subsurfaceMaterialData.transmissionColor = surface.SubsurfaceData.TransmissionColor; + subsurfaceMaterialData.scatteringColor = surface.SubsurfaceData.ScatteringColor; + subsurfaceMaterialData.scale = surface.SubsurfaceData.Scale; + subsurfaceMaterialData.g = surface.SubsurfaceData.Anisotropy; + + if (Frame.SSSMaterialOverride) { + subsurfaceMaterialData.transmissionColor = Frame.OverrideSSSTransmissionColor; + subsurfaceMaterialData.scatteringColor = Frame.OverrideSSSScatteringColor; + subsurfaceMaterialData.scale = Frame.OverrideSSSScale; + subsurfaceMaterialData.g = Frame.OverrideSSSAnisotropy; + } + + const float3 geometryNormal = surface.FaceNormal; + const float3 shadingNormal = surface.Normal; + + const float3 tangentWorld = any(dot(surface.Tangent, surface.Tangent) > 1e-5f) ? + normalize(surface.Tangent) : + (dot(geometryNormal, float3(0.0f, 1.0f, 0.0f)) < 0.999f ? cross(geometryNormal, float3(0.0f, 1.0f, 0.0f)) : + cross(geometryNormal, float3(1.0f, 0.0f, 0.0f))); + + const float3 biTangentWorld = cross(tangentWorld, geometryNormal); + SubsurfaceInteraction subsurfaceInteraction = + CreateSubsurfaceInteraction(surface.Position, shadingNormal, tangentWorld, biTangentWorld); + + float3 radiance = float3(0.0f, 0.0f, 0.0f); + + float3 incidentVector; + float lightDistance; + float3 irradiance; + GetLightIrradianceMIS(instance, surface, irradiance, incidentVector, lightDistance, randomSeed); + const float3 vectorToLight = normalize(incidentVector); + const float3 lightVector = vectorToLight * lightDistance; + + if (any(irradiance) > MIN_DIFFUSE_SHADOW) + { + const float3 centerSpecularF0 = surface.F0; + const float3 diffuseAlbedo = surface.DiffuseAlbedo; + subsurfaceMaterialData.transmissionColor = Frame.SSSMaterialOverride ? subsurfaceMaterialData.transmissionColor : diffuseAlbedo; + + const float3 cameraUp = float3( + Frame.ViewInverse[0][0], + Frame.ViewInverse[1][0], + Frame.ViewInverse[2][0]); + + const float3 cameraDirection = float3( + Frame.ViewInverse[0][2], + Frame.ViewInverse[1][2], + Frame.ViewInverse[2][2]); + + if (Random(randomSeed) < 0.5f) + { + subsurfaceInteraction.normal = -cameraDirection; + subsurfaceInteraction.tangent = cameraUp; + subsurfaceInteraction.biTangent = cross(cameraUp, -cameraDirection); + } + + uint effectiveSample = 0; + + for (uint sssSampleIndex = 0; sssSampleIndex < Frame.SSSSampleCount; ++sssSampleIndex) + { + SubsurfaceSample subsurfaceSample; + + const float2 rand2 = float2(Random(randomSeed), Random(randomSeed)); + EvalBurleyDiffusionProfile(subsurfaceMaterialData, + subsurfaceInteraction, + Frame.SSSMaxSampleRadius, + Frame.EnableSssTransmission && false, // disable normalization + rand2, + subsurfaceSample); + + Payload samplePayload = SampleSubsurface(Scene, subsurfaceSample.samplePosition, subsurfaceInteraction.normal, RAY_TMAX, randomSeed); + + if (samplePayload.Hit() && samplePayload.InstanceIndex() == initialPayload.InstanceIndex()) + { + const float3 sampleLocalPosition = subsurfaceSample.samplePosition + samplePayload.hitDistance * (-subsurfaceInteraction.normal); + Instance sampleInstance; + Material sampleMaterial; + Surface sampleSurface = Surface(sampleLocalPosition, samplePayload, -subsurfaceInteraction.normal, rayCone, sampleInstance, sampleMaterial); + if (sampleSurface.SubsurfaceData.HasSubsurface == 0) + { + continue; + } + + const float3 sampleGeometryNormal = sampleSurface.FaceNormal; + const float3 sampleShadingNormal = sampleSurface.Normal; + const bool transition = dot(vectorToLight, sampleGeometryNormal) < 0.0f; + const float3 samplePosition = subsurfaceSample.samplePosition - subsurfaceInteraction.normal * samplePayload.hitDistance; + + float3 sampleShadowHitPos = OffsetRay(samplePosition, sampleGeometryNormal, transition); + + // Prepare data needed to evaluate the sample light + float3 sampleIncidentVector = float3(0.0f, 0.0f, 0.0f); + float sampleLightDistance = 0.0f; + float3 sampleLightIrradiance = 0.0f; + + GetLightIrradianceMIS(sampleInstance, sampleSurface, sampleLightIrradiance, sampleIncidentVector, sampleLightDistance, randomSeed); + + if (any(sampleLightIrradiance > MIN_DIFFUSE_SHADOW)) { + const float3 vectorToLight = normalize(sampleIncidentVector); + + // Cast shadow ray towards the selected light for current SSS sample + const float3 sampleLightVisibility = TraceRayShadowFinite(Scene, sampleSurface, vectorToLight, sampleLightDistance, randomSeed); + if (any(sampleLightVisibility > 0.0f)) + { + const float3 sampleLightRadiance = sampleLightIrradiance * sampleLightVisibility; + const float cosThetaI = min(max(0.00001f, dot(vectorToLight, sampleShadingNormal)), 1.0f); + radiance += max(EvalBssrdf(subsurfaceSample, sampleLightRadiance, cosThetaI), 0.0f); + + ++effectiveSample; + } + } + } + } + + radiance /= (float)Frame.SSSSampleCount; + } + + if (Frame.EnableSssTransmission) + { + radiance += max(evalSingleScatteringTransmission( + surface, + brdfContext, + material, + instance, + subsurfaceMaterialData, + subsurfaceInteraction, + rayCone, + randomSeed), 0.0f); + } + + // Evaluate microfacet specular reflection at the surface + { + const bool transition = dot(vectorToLight, geometryNormal) < 0.0f; + const float3 shadowHitPosOffset = surface.Position; + const float3 shadowV = brdfContext.ViewDirection; + // Cast shadow ray towards the selected light + const float3 lightVisibility = TraceRayShadowFinite(Scene, surface, vectorToLight, lightDistance, randomSeed); + + if (any(lightVisibility > 0.0f)) + { + const float3 lightRadiance = irradiance * lightVisibility; + const float alpha = max(surface.Roughness * surface.Roughness, 0.01f); + float3 bsdf = evalMicrofacet(brdfContext.ViewDirection, vectorToLight, surface.Normal, alpha); + float3 halfVector = normalize(brdfContext.ViewDirection + vectorToLight); + float VdotH = saturate(dot(brdfContext.ViewDirection, halfVector)); + float3 F = evalFresnelSchlick(surface.F0, VdotH); + bsdf *= F; + + radiance += max(bsdf * lightRadiance, 0.0f); + } + } + + return radiance; +} + +#endif // __SUBSURFACESHADING_HLSLI__ \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Registers.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Registers.hlsli new file mode 100644 index 0000000000..a3a7680e32 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Registers.hlsli @@ -0,0 +1,41 @@ +#ifndef REGISTERS_HLSL +#define REGISTERS_HLSL + +#include "Raytracing/Includes/Types.hlsli" +#include "Raytracing/Includes/RT/SHaRC.hlsli" + +ConstantBuffer Frame : register(b0); + +RWTexture2D OutputTexture : register(u0); +RWTexture2D DiffuseAlbedoPathTracing : register(u1); +RWTexture2D NormalRoughnessPathTracing : register(u2); +RWTexture2D SpecularAlbedo : register(u3); +RWTexture2D SpecularHitDist : register(u4); + +#ifdef SHARC +RWStructuredBuffer u_SharcHashEntriesBuffer : register(u5); +RWStructuredBuffer u_SharcLockBuffer : register(u6); +RWStructuredBuffer u_SharcAccumulationBuffer : register(u7); +RWStructuredBuffer u_SharcResolvedBuffer : register(u8); +#endif + +Texture2D MainTexture : register(t0, space0); // RENDER_TARGETS::kMAIN +Texture2D DepthTexture : register(t1, space0); // RENDER_TARGETS_DEPTHSTENCIL::kMAIN - R32 +Texture2D AlbedoTexture : register(t2, space0); // ALBEDO - True albedo (not modulated by metalness) +Texture2D NormalRoughnessTexture : register(t3, space0); // "NORMALROUGHNESS" - World normals and roughness - Processed from GBuffer encoded view normals and smoothness +Texture2D GNMAOTexture : register(t4, space0); // MASKS2 - Geometry normals (Encoded) + metalness/AO (Packed) + +RaytracingAccelerationStructure Scene : register(t5, space0); +Texture2D SkyHemisphere : register(t6, space0); +StructuredBuffer Lights : register(t7, space0); +StructuredBuffer Shapes : register(t8, space0); +StructuredBuffer Instances : register(t9, space0); + +StructuredBuffer Vertices[] : register(t0, space1); +StructuredBuffer Triangles[] : register(t0, space2); +Texture2D Textures[] : register(t0, space3); + +SamplerState BaseSampler : register(s0); +//SamplerState EffectSampler : register(s1); + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/SharedData.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/SharedData.hlsli new file mode 100644 index 0000000000..009cafa6ef --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/SharedData.hlsli @@ -0,0 +1,170 @@ +#ifndef SHARED_DATA_HLSL +#define SHARED_DATA_HLSL +// A lighter version of SharedData containing only the necessary structs for HLSL 6.0+ compatibility + +#ifndef __cplusplus +typedef bool BOOL; +#endif + +struct CPMSettings +{ + BOOL EnableComplexMaterial; + BOOL EnableParallax; + BOOL EnableTerrainParallax; + BOOL EnableHeightBlending; + BOOL EnableShadows; + BOOL ExtendShadows; + BOOL EnableParallaxWarpingFix; + uint pad0; +}; +#ifdef __cplusplus +static_assert(sizeof(CPMSettings) % 16 == 0); +#endif + +struct WetnessEffectsSettings +{ + #ifndef __cplusplus + row_major + #endif + float4x4 OcclusionViewProj; + + float Time; + float Raining; + float Wetness; + float PuddleWetness; + + BOOL EnableWetnessEffects; + float MaxRainWetness; + float MaxPuddleWetness; + float MaxShoreWetness; + + uint ShoreRange; + float PuddleRadius; + float PuddleMaxAngle; + float PuddleMinWetness; + + float MinRainWetness; + float SkinWetness; + float WeatherTransitionSpeed; + BOOL EnableRaindropFx; + + BOOL EnableSplashes; + BOOL EnableRipples; + uint EnableVanillaRipples; + float RaindropFxRange; + + float RaindropGridSizeRcp; + float RaindropIntervalRcp; + float RaindropChance; + float SplashesLifetime; + + float SplashesStrength; + float SplashesMinRadius; + float SplashesMaxRadius; + float RippleStrength; + + float RippleRadius; + float RippleBreadth; + float RippleLifetimeRcp; + float pad0; +}; +#ifdef __cplusplus +static_assert(sizeof(WetnessEffectsSettings) % 16 == 0); +#endif + +struct CloudShadowsSettings +{ + float Opacity; + float3 pad0; +}; +#ifdef __cplusplus +static_assert(sizeof(CloudShadowsSettings) % 16 == 0); +#endif + +struct HairSpecularSettings +{ + uint Enabled; + float HairGlossiness; + float SpecularMult; + float DiffuseMult; + uint EnableTangentShift; + float PrimaryTangentShift; + float SecondaryTangentShift; + float HairSaturation; + float SpecularIndirectMult; + float DiffuseIndirectMult; + float BaseColorMult; + float Transmission; + uint EnableSelfShadow; + float SelfShadowStrength; + float SelfShadowExponent; + float SelfShadowScale; + uint HairMode; // 0: Kajiya-Kay, 1: Marschner + uint pad0; + uint pad1; + uint pad2; +}; +#ifdef __cplusplus +static_assert(sizeof(HairSpecularSettings) % 16 == 0); +#endif + +struct ExtendedTranslucencySettings +{ + uint MaterialModel; // [0,1,2,3] The MaterialModel + float Reduction; // [0, 1.0] The factor to reduce the transparency to matain the average transparency [0,1] + float Softness; // [0, 2.0] The soft remap upper limit [0,2] + float Strength; // [0, 1.0] The inverse blend weight of the effect +}; +#ifdef __cplusplus +static_assert(sizeof(ExtendedTranslucencySettings) % 16 == 0); +#endif + +struct LinearLightingSettings +{ + uint enableLinearLighting; + uint enableGammaCorrection; + uint isDirLightLinear; + float dirLightMult; + float lightGamma; + float colorGamma; + float emitColorGamma; + float glowmapGamma; + float ambientGamma; + float fogGamma; + float fogAlphaGamma; + float effectGamma; + float effectAlphaGamma; + float skyGamma; + float waterGamma; + float vlGamma; + float vanillaDiffuseColorMult; + float directionalLightMult; // Computed based on interior/exterior + float pointLightMult; + float ambientMult; + float emitColorMult; + float glowmapMult; + float effectLightingMult; + float membraneEffectMult; + float bloodEffectMult; + float projectedEffectMult; + float deferredEffectMult; + float otherEffectMult; +}; +#ifdef __cplusplus +static_assert(sizeof(LinearLightingSettings) % 16 == 0); +#endif + +struct FeatureData +{ + CPMSettings ExtendedMaterial; + WetnessEffectsSettings WetnessEffects; + CloudShadowsSettings CloudShadows; + HairSpecularSettings HairSpecular; + ExtendedTranslucencySettings ExtendedTranslucency; + LinearLightingSettings LinearLighting; +}; +#ifdef __cplusplus +static_assert(sizeof(FeatureData) % 16 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Surface.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Surface.hlsli new file mode 100644 index 0000000000..906d832278 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Surface.hlsli @@ -0,0 +1,720 @@ +#ifndef SURFACE_HLSL +#define SURFACE_HLSL + +#include "Raytracing/Includes/Common.hlsli" +#include "Raytracing/Includes/ColorConversions.hlsli" +#include "Raytracing/Includes/PBR.hlsli" +#include "Raytracing/Includes/Types.hlsli" +#include "Raytracing/Includes/RT/Geometry.hlsli" +#include "Raytracing/Includes/RT/CommonRT.hlsli" +#include "Raytracing/Includes/VanillaToPBR.hlsli" + +#include "Raytracing/Includes/Materials/TexLODHelpers.hlsli" + +#define HAIRSETTINGS Frame.Features.HairSpecular + +// Helpers + +float3 SafeNormalize(float3 input) +{ + float lenSq = dot(input,input); + return input * rsqrt(max( 1.175494351e-38, lenSq)); +} + +float3 FlipIfOpposite(float3 normal, float3 referenceNormal) +{ + return (dot(normal, referenceNormal)>=0)?(normal):(-normal); +} + +struct Subsurface +{ + float3 TransmissionColor; + float Scale; + float3 ScatteringColor; + float Anisotropy; + uint HasSubsurface; +}; + +#define Surface(...) static Surface ctor(__VA_ARGS__) +struct Surface +{ + float3 Position; + float3 GeomNormal; + float3 GeomTangent; + float3 Normal; + float3 Tangent; + float3 Bitangent; + float3 FaceNormal; + float3 Albedo; + float3 DiffuseAlbedo; + float Roughness; + float Metallic; + float3 Emissive; + float AO; + float3 F0; + float IOR; + float3 TransmissionColor; + Subsurface SubsurfaceData; + float DiffTrans; + float SpecTrans; + +#if defined(FULL_MATERIAL) + float3 SubsurfaceColor; + float Thickness; + float3 CoatColor; + float CoatStrength; + float CoatRoughness; + float3 CoatF0; + float3 FuzzColor; + float FuzzWeight; + float GlintScreenSpaceScale; + float GlintLogMicrofacetDensity; + float GlintMicrofacetRoughness; + float GlintDensityRandomization; + //Glints::GlintCachedVars GlintCache; + float Noise; +#endif + + float MipLevel; + + float3 Mul(float3 tangentSample) + { + return Tangent * tangentSample.x + + Bitangent * tangentSample.y + + Normal * tangentSample.z; + } + + float3 ToLocal(float3 v) + { + return float3( + dot(v, Tangent), + dot(v, Bitangent), + dot(v, Normal) + ); + } + + float3 FromLocal(float3 v) + { + return Mul(v); + } + + void FlipNormal() + { + Normal = -Normal; + GeomNormal = -GeomNormal; + FaceNormal = -FaceNormal; + } + + void DefaultMaterial(in Vertex v0, in Vertex v1, in Vertex v2, in float3 uvw, in float3 normalWS, in float3 tangentWS, in float3 bitangentWS, float3x3 objectToWorld3x3, in Material material) + { + float2 texCoord0 = material.TexCoord(Interpolate(v0.Texcoord0, v1.Texcoord0, v2.Texcoord0, uvw)); + TransmissionColor = float3(0.0f, 0.0f, 0.0f); + +#if defined(DEBUG_SHADERTYPE) + [branch] + if (material.ShaderType == ShaderType::TruePBR) { + Albedo = float3(1.0f, 0.0f, 0.0f); + } else if (material.ShaderType == ShaderType::Lighting) { + Albedo = float3(0.0f, 1.0f, 0.0f); + } else if (material.ShaderType == ShaderType::Effect) { + Albedo = float3(0.0f, 0.0f, 1.0f); + } else { + Albedo = float3(1.0f, 1.0f, 1.0f); + } +#elif defined(DEBUG_NOSAMPLING) + Albedo = float3(0.5f, 0.5f, 0.5f); +#else + Texture2D baseTexture = Textures[NonUniformResourceIndex(material.BaseTexture())]; + + float4 vertexColor = Interpolate(v0.Color.unpack(), v1.Color.unpack(), v2.Color.unpack(), uvw); + vertexColor = saturate(vertexColor / max(max(vertexColor.r, vertexColor.g), vertexColor.b)); + + const bool isWindows = (material.Feature == Feature::kGlowMap || material.PBRFlags & PBR::Flags::HasEmissive) && material.ShaderFlags & ShaderFlags::kAssumeShadowmask; + float3 windowAlpha = float3(0.0f, 0.0f, 0.0f); + + [branch] + if (material.ShaderType == ShaderType::TruePBR) + { + Texture2D rmaosTexture = Textures[NonUniformResourceIndex(material.RMAOSTexture())]; + Texture2D emissiveTexture = Textures[NonUniformResourceIndex(material.EmissiveTexture())]; + + float3 albedo = baseTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).rgb; + float4 rmaos = rmaosTexture.SampleLevel(BaseSampler, texCoord0, MipLevel); + float3 emissive = emissiveTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).rgb; + + if (isWindows) { + windowAlpha = emissive; + } + + Albedo = albedo * material.BaseColor().rgb * vertexColor.rgb; + Emissive = emissive * EmitColorToLinear(material.EffectColor().rgb) * material.EffectColor().a * Frame.Emissive * EmitColorMult(); + Roughness = saturate(rmaos.x * material.RoughnessScale()); + Metallic = saturate(rmaos.y); + AO = rmaos.z; + F0 = material.SpecularLevel() * rmaos.w; + + if ((material.PBRFlags & PBR::Flags::Subsurface) && !(material.ShaderFlags & ShaderFlags::kTwoSided)) { + Texture2D subsurfaceTexture = Textures[NonUniformResourceIndex(material.SubsurfaceTexture())]; + + float4 subsurfaceColor = subsurfaceTexture.SampleLevel(BaseSampler, texCoord0, MipLevel); + float thickness = subsurfaceColor.a * material.SubsurfaceScale(); + SubsurfaceData.ScatteringColor = subsurfaceColor.rgb * material.SubsurfaceScatteringColor().rgb; + SubsurfaceData.TransmissionColor = Albedo; + + TransmissionColor = SubsurfaceData.ScatteringColor; + + SubsurfaceData.Scale = 40.0f; + SubsurfaceData.Anisotropy = 0.0f; + + SubsurfaceData.HasSubsurface = any(SubsurfaceData.ScatteringColor) > 0.0f ? 1 : 0; + } else if ((material.PBRFlags & PBR::Flags::Subsurface) && (material.ShaderFlags & ShaderFlags::kTwoSided)) { + // Two sided subsurface - for leaves and thin objects + Texture2D subsurfaceTexture = Textures[NonUniformResourceIndex(material.SubsurfaceTexture())]; + // Just use simple diffuse transmission for thin objects + float4 subsurfaceColor = subsurfaceTexture.SampleLevel(BaseSampler, texCoord0, MipLevel); + float thickness = subsurfaceColor.a * material.SubsurfaceScale(); + TransmissionColor = subsurfaceColor.rgb * material.SubsurfaceScatteringColor().rgb; + DiffTrans = 1 - thickness; + } + } else if (material.ShaderType == ShaderType::Lighting) { + float3 diffuse = baseTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).rgb; + + Albedo = VanillaDiffuseColor(diffuse * vertexColor.rgb); + + if (material.Feature == Feature::kHairTint) { + float3 hairTint = material.BaseColor().rgb; + Albedo *= VanillaDiffuseColor(hairTint); + } + + [branch] + if (material.ShaderFlags & ShaderFlags::kSpecular) { + float3 specularColor = material.SpecularColor().rgb; + + [branch] + if (material.ShaderFlags & ShaderFlags::kModelSpaceNormals) { + Texture2D specularTexture = Textures[NonUniformResourceIndex(material.SpecularTexture())]; + specularColor *= specularTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).r; + } else { + Texture2D normalTexture = Textures[NonUniformResourceIndex(material.NormalTexture())]; + specularColor *= normalTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).a; + } + +#if defined(EXP_VANILLA_PBR_METAL) + float specularity = CalcSpecularity(specularColor, material.SpecularColor().a); + float roughnessFromShininess = material.RoughnessScale(); + + Metallic = CalcMetallic(Albedo, specularity, roughnessFromShininess); +#endif + + Roughness = material.RoughnessScale(); + + F0 = clamp(0.08f * specularColor * material.SpecularColor().a, 0.02f, 0.08f); + } + + [branch] + if (material.ShaderFlags & ShaderFlags::kEnvMap || material.ShaderFlags & ShaderFlags::kEyeReflect) { + Texture2D envTexture = Textures[NonUniformResourceIndex(material.EnvTexture())]; + Texture2D envMaskTexture = Textures[NonUniformResourceIndex(material.EnvMaskTexture())]; + + float3 envColor = ColorToLinear(envTexture.SampleLevel(BaseSampler, texCoord0, 15).rgb); + float envMask = envMaskTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).r; + + Albedo = lerp(Albedo, envColor, envMask); + Metallic = envMask; + } + + [branch] + if (material.Feature == Feature::kGlowMap) { + Texture2D glowTexture = Textures[NonUniformResourceIndex(material.GlowTexture())]; + float3 glow = glowTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).rgb; + + if (isWindows) { + windowAlpha = glow; + } + Emissive = GlowToLinear(glow) * EmitColorToLinear(material.EffectColor().rgb) * material.EffectColor().a * Frame.Emissive * EmitColorMult(); + } + else + { + Emissive = Albedo * EmitColorToLinear(material.EffectColor().rgb) * material.EffectColor().a * Frame.Emissive * EmitColorMult(); + } + + [branch] + if (material.Feature == Feature::kFaceGen) { + Texture2D detailTexture = Textures[NonUniformResourceIndex(material.DetailTexture())]; + float3 detailColor = detailTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).rgb; + detailColor = float3(3.984375, 3.984375, 3.984375) * (float3(0.00392156886, 0, 0.00392156886) + detailColor); + + Texture2D tintTexture = Textures[NonUniformResourceIndex(material.TintTexture())]; + float3 tintColor = tintTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).rgb; + tintColor = tintColor * Albedo * 2.0f; + tintColor = tintColor - tintColor * Albedo; + Albedo = (Albedo * Albedo + tintColor) * detailColor; + + } else if (material.Feature == Feature::kFaceGenRGBTint) { + float3 tintColor = material.BaseColor().rgb * Albedo * 2.0f; + tintColor = tintColor - tintColor * Albedo; + Albedo = float3(1.01171875f, 0.99609375f, 1.01171875f) * (Albedo * Albedo + tintColor); + } + + [branch] + if (material.Feature == Feature::kFaceGen || material.Feature == Feature::kFaceGenRGBTint) { + F0 = 0.02776f; + Metallic = 0.0f; + SubsurfaceData.HasSubsurface = 1; + SubsurfaceData.Anisotropy = -0.5f; + + // Typical skin values + SubsurfaceData.ScatteringColor = float3(4.820f, 1.690f, 1.090f); + SubsurfaceData.TransmissionColor = Albedo; + SubsurfaceData.Scale = 1.f; + } + + [branch] + if (material.Feature == Feature::kEye) { + Roughness = 0.08f; + F0 = 0.02776f; + Metallic = 0.0f; + SubsurfaceData.HasSubsurface = 1; + SubsurfaceData.Anisotropy = -0.5f; + // Typical eye values + SubsurfaceData.ScatteringColor = float3(1.0f, 0.8f, 0.6f); + SubsurfaceData.TransmissionColor = Albedo; + SubsurfaceData.Scale = 1.f; + } + + } else if (material.ShaderType == ShaderType::Effect) { + float3 base = float3(1, 1, 1); + + [branch] + if (material.ShaderFlags & ShaderFlags::kGrayscaleToPaletteColor) + { + base *= baseTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).rgb; + } + + float3 baseColorMul = material.EffectColor().rgb; + + [branch] + if (material.ShaderFlags & ShaderFlags::kVertexColors && !(material.ShaderFlags & ShaderFlags::kProjectedUV)) + { + base *= vertexColor.rgb; + } + + float3 baseColor = base * baseColorMul; + + float baseColorScale = material.EffectColor().a; + + [branch] + if (material.ShaderFlags & ShaderFlags::kGrayscaleToPaletteColor) + { + Texture2D effectTexture = Textures[NonUniformResourceIndex(material.EffectTexture())]; + + float2 grayscaleToColorUv = float2(base.g, baseColorMul.x); + + baseColor = baseColorScale * effectTexture.SampleLevel(BaseSampler, grayscaleToColorUv, MipLevel).rgb; + } + + float3 baseColorLinear = EffectToLinear(baseColor); + + //Albedo = baseColorLinear; // This breaks sharc + Albedo = 0; + Emissive = baseColorLinear * Frame.Effect; + } + else + { + Albedo = float3(1.0f, 0.0f, 1.0f); + } + + [branch] + if (material.AlphaFlags == AlphaFlags::kAlphaBlend && !((material.Feature == Feature::kHairTint || material.Feature == Feature::kFaceGen || material.Feature == Feature::kFaceGenRGBTint || material.Feature == Feature::kEye))) { + float alpha = baseTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).a * material.BaseColor().a; + + [branch] + if ((material.ShaderFlags & ShaderFlags::kVertexAlpha) && !(material.ShaderFlags & ShaderFlags::kTreeAnim)) { + alpha *= vertexColor.a; + } + + TransmissionColor = lerp(float3(1.0f, 1.0f, 1.0f), Albedo, alpha); + Albedo *= alpha; + SpecTrans = 1.0f; + } + + [branch] + if (isWindows) { + TransmissionColor = windowAlpha; + Albedo *= 1.0f - windowAlpha; + Emissive *= 0; + Roughness = max(Roughness, 0.08f); // prevent delta transmission + SpecTrans = 1.0f; + } + + [branch] + if (material.ShaderFlags & ShaderFlags::kExternalEmittance) { + Emissive *= Frame.EmittanceColor; + } +#endif + +#if defined(DEBUG_NONORMALMAP) + Normal = normalWS; + Tangent = tangentWS; + Bitangent = bitangentWS; +#else + Texture2D normalTexture = Textures[NonUniformResourceIndex(material.NormalTexture())]; + float3 normal = normalTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).xyz; + + float handedness = (dot(cross(normalWS, tangentWS), bitangentWS) < 0.0f) ? -1.0f : 1.0f; + + NormalMap( + normal, + handedness, + normalWS, tangentWS, bitangentWS, + Normal, Tangent, Bitangent + ); +#endif + + // Hair flowmap processing +#if HAIR_MODE + [branch] + if (material.Feature == Feature::kHairTint && HAIRSETTINGS.Enabled) { + Roughness = 1.0f - saturate(HAIRSETTINGS.HairGlossiness * 0.01f); + Albedo = saturate(Albedo * HAIRSETTINGS.BaseColorMult); + [branch] + if (material.ShaderFlags & ShaderFlags::kBackLighting) { + Texture2D hairFlowMapTexture = Textures[NonUniformResourceIndex(material.SpecularTexture())]; + uint2 hairFlowDimensions; + hairFlowMapTexture.GetDimensions(hairFlowDimensions.x, hairFlowDimensions.y); + + [branch] + if (hairFlowDimensions.x > 32 && hairFlowDimensions.y > 32) { + float2 sampledHairFlow2D = hairFlowMapTexture.SampleLevel(BaseSampler, texCoord0, MipLevel).xy; + + [branch] + if (sampledHairFlow2D.x > 0.0 || sampledHairFlow2D.y > 0.0) { + float3 sampledHairFlow = float3(sampledHairFlow2D * 2.0f - 1.0f, 0.0f); + float3x3 tbn = float3x3(Tangent, Bitangent, Normal); + float3 hairRootDirection = normalize(mul(sampledHairFlow, tbn)); + + // Re-orthogonalize T and B to N and the new hair root direction + hairRootDirection = normalize(hairRootDirection - Normal * dot(hairRootDirection, Normal)); + Bitangent = hairRootDirection; + + float hairHandedness = (dot(cross(Normal, Tangent), Bitangent) < 0.0f) ? -1.0f : 1.0f; + Tangent = normalize(cross(Bitangent, Normal)) * hairHandedness; + } + } + } + } +#endif + } + + float4 BlendLandTexture(uint16_t textureIndex, float2 texcoord, float weight) + { + if (weight > LAND_MIN_WEIGHT) + { + Texture2D texture = Textures[NonUniformResourceIndex(textureIndex)]; + return texture.SampleLevel(BaseSampler, texcoord, MipLevel) * weight; + } + else + { + return float4(0.0f, 0.0f, 0.0f, 0.0f); + } + } + + void LandMaterial(in Vertex v0, in Vertex v1, in Vertex v2, float3 uvw, float3 normalWS, float3 tangentWS, float3 bitangentWS, in Material material) + { + float2 texCoord0 = material.TexCoord(Interpolate(v0.Texcoord0, v1.Texcoord0, v2.Texcoord0, uvw)); + + Texture2D overlayTexture = Textures[NonUniformResourceIndex(material.OverlayTexture())]; + Texture2D noiseTexture = Textures[NonUniformResourceIndex(material.NoiseTexture())]; + + float4 vertexColor = Interpolate(v0.Color.unpack(), v1.Color.unpack(), v2.Color.unpack(), uvw); + + float handedness = (dot(cross(normalWS, tangentWS), bitangentWS) < 0.0f) ? -1.0f : 1.0f; + + float4 landBlend0 = Interpolate(v0.LandBlend0.unpack(), v1.LandBlend0.unpack(), v2.LandBlend0.unpack(), uvw); + float4 landBlend1 = Interpolate(v0.LandBlend1.unpack(), v1.LandBlend1.unpack(), v2.LandBlend1.unpack(), uvw); + + // Normalise blend weights + float totalWeight = landBlend0.x + landBlend0.y + landBlend0.z + + landBlend0.w + landBlend1.x + landBlend1.y; + + landBlend0 /= totalWeight; + landBlend1.xy /= totalWeight; + + float3 baseColor = BlendLandTexture(material.Texture0, texCoord0, landBlend0.x).rgb + BlendLandTexture(material.Texture1, texCoord0, landBlend0.y).rgb + + BlendLandTexture(material.Texture2, texCoord0, landBlend0.z).rgb + BlendLandTexture(material.Texture3, texCoord0, landBlend0.w).rgb + + BlendLandTexture(material.Texture4, texCoord0, landBlend1.x).rgb + BlendLandTexture(material.Texture5, texCoord0, landBlend1.y).rgb; + + baseColor *= vertexColor.rgb; + + [branch] + if (material.ShaderType == ShaderType::TruePBR) + { + Albedo = baseColor; + + float4 rmaos = BlendLandTexture(material.Texture12, texCoord0, landBlend0.x) + BlendLandTexture(material.Texture13, texCoord0, landBlend0.y) + + BlendLandTexture(material.Texture14, texCoord0, landBlend0.z) + BlendLandTexture(material.Texture15, texCoord0, landBlend0.w) + + BlendLandTexture(material.Texture16, texCoord0, landBlend1.x) + BlendLandTexture(material.Texture17, texCoord0, landBlend1.y); + + Roughness = saturate(rmaos.x * 1.0f); // material.RoughnessScale() + Metallic = saturate(rmaos.y); + AO = rmaos.z; + F0 = PBR::Defaults::F0 * rmaos.w; //material.SpecularLevel() + } + else if (material.ShaderType == ShaderType::Lighting) + { + Albedo = baseColor; // GammaToTrueLinear looks wonky + } + +#if defined(DEBUG_NONORMALMAP) + Normal = normalWS; + Tangent = tangentWS; + Bitangent = bitangentWS; +#else + float3 normal = BlendLandTexture(material.Texture6, texCoord0, landBlend0.x).rgb + BlendLandTexture(material.Texture7, texCoord0, landBlend0.y).rgb + + BlendLandTexture(material.Texture8, texCoord0, landBlend0.z).rgb + BlendLandTexture(material.Texture9, texCoord0, landBlend0.w).rgb + + BlendLandTexture(material.Texture10, texCoord0, landBlend1.x).rgb + BlendLandTexture(material.Texture11, texCoord0, landBlend1.y).rgb; + + NormalMap( + normal, + handedness, + normalWS, tangentWS, bitangentWS, + Normal, Tangent, Bitangent + ); +#endif + } + + + void TestMaterial(in Vertex v0, in Vertex v1, in Vertex v2, in float3 uvw, in float3 normalWS, in float3 tangentWS, in float3 bitangentWS, in Material material) + { + Albedo = 0.18f; // Neutral grey + TransmissionColor = float3(0.0f, 0.0f, 0.0f); + + Normal = normalWS; + Tangent = tangentWS; + Bitangent = bitangentWS; + } + + float ComputeRayConeTriangleLODValue(in Vertex v0, in Vertex v1, in Vertex v2, float3x3 world) + { + float3 vertexPositions[3]; + vertexPositions[0] = v0.Position; + vertexPositions[1] = v1.Position; + vertexPositions[2] = v2.Position; + + float2 vertexTexcoords[3]; + vertexTexcoords[0] = v0.Texcoord0; + vertexTexcoords[1] = v1.Texcoord0; + vertexTexcoords[2] = v2.Texcoord0; + + return computeRayConeTriangleLODValue( + vertexPositions, + vertexTexcoords, + world + ); + } + + Surface(float3 position, Payload payload, float3 rayDir, RayCone rayCone, out Instance instance, out Material material) + { + Surface surface; + + surface.Position = position; + surface.SubsurfaceData = (Subsurface)0; + surface.DiffTrans = 0.0f; + surface.SpecTrans = 0.0f; + + Shape shape = GetShape(payload, instance); + + // Loads all geometry releated data + Vertex v0, v1, v2; + GetVertices(shape.GeometryIdx, payload.primitiveIndex, v0, v1, v2); + float3 uvw = GetBary(payload.Barycentrics()); + + material = shape.Material; + + float2 texCoord0 = material.TexCoord(Interpolate(v0.Texcoord0, v1.Texcoord0, v2.Texcoord0, uvw)); + + float3x3 objectToWorld3x3 = mul((float3x3) instance.Transform, (float3x3) shape.Transform); + + float coneTexLODValue = surface.ComputeRayConeTriangleLODValue(v0, v1, v2, objectToWorld3x3); + + float3 objectSpaceFlatNormal = SafeNormalize(cross( + v1.Position - v0.Position, + v2.Position - v0.Position)); + + float3 normal0 = FlipIfOpposite(v0.Normal, objectSpaceFlatNormal); + float3 normal1 = FlipIfOpposite(v1.Normal, objectSpaceFlatNormal); + float3 normal2 = FlipIfOpposite(v2.Normal, objectSpaceFlatNormal); + + float3 normalWS = SafeNormalize(mul(objectToWorld3x3, Interpolate(normal0, normal1, normal2, uvw))); + float3 tangentWS = SafeNormalize(mul(objectToWorld3x3, Interpolate(v0.Tangent, v1.Tangent, v2.Tangent, uvw))); + float3 bitangentWS = SafeNormalize(mul(objectToWorld3x3, Interpolate(v0.Bitangent, v1.Bitangent, v2.Bitangent, uvw))); + + surface.FaceNormal = SafeNormalize(mul(objectToWorld3x3, objectSpaceFlatNormal)); + + surface.MipLevel = rayCone.computeLOD(coneTexLODValue, rayDir, normalWS, true) + Frame.TexLODBias; + + Texture2D baseTextureForLod = Textures[NonUniformResourceIndex(material.BaseTexture())]; + uint baseTexWidth, baseTexHeight; + baseTextureForLod.GetDimensions(baseTexWidth, baseTexHeight); + surface.MipLevel += 0.5f * SafeLog2(max(1.0f, (float)baseTexWidth * (float)baseTexHeight)); + surface.GeomNormal = normalWS; + surface.GeomTangent = tangentWS; + + surface.Albedo = float3(1.0f, 1.0f, 1.0f); + surface.Emissive = float3(0.0f, 0.0f, 0.0f); + surface.TransmissionColor = float3(0.0f, 0.0f, 0.0f); + surface.Roughness = PBR::Defaults::Roughness; + surface.Metallic = PBR::Defaults::Metallic; + surface.AO = 1.0f; + surface.F0 = PBR::Defaults::F0; + + +#if defined(DEBUG_TESTMAT) + surface.TestMaterial(v0, v1, v2, uvw, normalWS, tangentWS, bitangentWS, material); +#else + if (material.Feature == Feature::kMultiTexLandLODBlend) + { +# if defined(DEBUG_LAND) + surface.Albedo = float3(1.0f, 0.0f, 0.0f); +# else + surface.LandMaterial(v0, v1, v2, uvw, normalWS, tangentWS, bitangentWS, material); +# endif + } + else + { + surface.DefaultMaterial(v0, v1, v2, uvw, normalWS, tangentWS, bitangentWS, objectToWorld3x3, material); + } +#endif + +#ifdef DEBUG_WHITE_FURNACE + surface.Albedo = float3(1.0f, 1.0f, 1.0f); + surface.TransmissionColor = float3(0.0f, 0.0f, 0.0f); +#endif + + surface.Roughness = PBR::Roughness(surface.Roughness, Frame.Roughness.x, Frame.Roughness.y); + surface.Metallic = Remap(surface.Metallic, Frame.Metalness.x, Frame.Metalness.y); + + surface.DiffuseAlbedo = surface.Albedo * (1.0f - surface.Metallic); + + surface.F0 = PBR::F0(surface.F0, surface.Albedo, surface.Metallic); + surface.IOR = F0toIOR(surface.F0); + +# ifdef DEBUG_GLASS + surface.TransmissionColor = 1.0f; + surface.Albedo = float3(0.0f, 0.0f, 0.0f); + surface.DiffuseAlbedo = float3(0.0f, 0.0f, 0.0f); + surface.Emissive = float3(0.0f, 0.0f, 0.0f); + surface.Metallic = 0.0f; + surface.Roughness = 0.1f; + surface.F0 = 0.04f; + surface.IOR = 1.5f; + surface.Normal = surface.GeomNormal; + return surface; +# endif + +# ifdef DEBUG_METAL + surface.TransmissionColor = 0.0f; + surface.Albedo = 0.18f; + surface.DiffuseAlbedo = float3(0.0f, 0.0f, 0.0f); + surface.Emissive = float3(0.0f, 0.0f, 0.0f); + surface.Metallic = 1.0f; + surface.Roughness = 0.1f; + surface.F0 = 0.04f; + surface.IOR = 1.5f; + // surface.Normal = surface.GeomNormal; + return surface; +# endif + +#if defined(FULL_MATERIAL) + surface.SubsurfaceColor = float3(0.0f, 0.0f, 0.0f); + surface.Thickness = 0.0f; + surface.CoatColor = float3(1.0f, 1.0f, 1.0f); + surface.CoatStrength = 0.0f; + surface.CoatRoughness = 0.0f; + surface.CoatF0 = float3(0.04f, 0.04f, 0.04f); + surface.FuzzColor = float3(0.0f, 0.0f, 0.0f); + surface.FuzzWeight = 0.0f; + surface.GlintScreenSpaceScale = 1.0f; + surface.GlintLogMicrofacetDensity = 0.0f; + surface.GlintMicrofacetRoughness = 0.0f; + surface.GlintDensityRandomization = 0.0f; + surface.Noise = 0.0f; +#endif + + return surface; + } + + Surface(float3 position, float3 geomNormal, float3 normal, float3 tangent, float3 bitangent, float3 albedo, float roughness, float metallic, float3 emissive, float ao) { + Surface surface; + surface.SubsurfaceData = (Subsurface)0; + surface.DiffTrans = 0.0f; + surface.SpecTrans = 0.0f; + + surface.Position = position; + + surface.FaceNormal = geomNormal; + + surface.MipLevel = 0.0f + Frame.TexLODBias; + surface.GeomNormal = geomNormal; + surface.GeomTangent = tangent; // not needed for hybrid + + surface.Normal = normal; + surface.Tangent = tangent; + surface.Bitangent = bitangent; + +# ifdef DEBUG_WHITE_FURNACE + surface.Albedo = float3(1.0f, 1.0f, 1.0f); +# else + surface.Albedo = albedo; + # endif + surface.TransmissionColor = float3(0.0f, 0.0f, 0.0f); + surface.Emissive = emissive * Frame.Emissive; + + surface.Roughness = PBR::Roughness(roughness, Frame.Roughness.x, Frame.Roughness.y); + surface.Metallic = Remap(metallic, Frame.Metalness.x, Frame.Metalness.y); + surface.AO = ao; + + surface.DiffuseAlbedo = surface.Albedo * (1.0f - surface.Metallic); + + surface.F0 = PBR::F0(albedo, metallic); + surface.IOR = F0toIOR(surface.F0); + +#if defined(FULL_MATERIAL) + surface.SubsurfaceColor = float3(0.0f, 0.0f, 0.0f); + surface.Thickness = 0.0f; + surface.CoatColor = float3(1.0f, 1.0f, 1.0f); + surface.CoatStrength = 0.0f; + surface.CoatRoughness = 0.0f; + surface.CoatF0 = float3(0.04f, 0.04f, 0.04f); + surface.FuzzColor = float3(0.0f, 0.0f, 0.0f); + surface.FuzzWeight = 0.0f; + surface.GlintScreenSpaceScale = 1.0f; + surface.GlintLogMicrofacetDensity = 0.0f; + surface.GlintMicrofacetRoughness = 0.0f; + surface.GlintDensityRandomization = 0.0f; + surface.Noise = 0.0f; +#endif + + return surface; + } +}; +#define Surface(...) Surface::ctor(__VA_ARGS__) + +#define BRDFContext(...) static BRDFContext ctor(__VA_ARGS__) +struct BRDFContext { + float3 ViewDirection; + float NdotV; + + BRDFContext(Surface surface, float3 viewDirection) + { + BRDFContext brdfContext; + + brdfContext.ViewDirection = viewDirection; + brdfContext.NdotV = saturate(dot(surface.Normal, viewDirection)); + + return brdfContext; + } +}; +#define BRDFContext(...) BRDFContext::ctor(__VA_ARGS__) + +#endif // SURFACE_HLSL \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types.hlsli new file mode 100644 index 0000000000..172ed1469d --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types.hlsli @@ -0,0 +1,17 @@ +#ifndef TYPES_HLSL +#define TYPES_HLSL + +#ifndef __cplusplus +typedef bool BOOL; +#endif + +#include "Raytracing/Includes/Types/Vertex.hlsli" +#include "Raytracing/Includes/Types/Triangle.hlsli" +#include "Raytracing/Includes/Types/Shape.hlsli" +#include "Raytracing/Includes/Types/Material.hlsli" +#include "Raytracing/Includes/Types/Instance.hlsli" +#include "Raytracing/Includes/Types/Light.hlsli" +#include "Raytracing/Includes/RT.hlsli" +#include "Raytracing/Includes/Types/FrameData.hlsli" + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/FrameData.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/FrameData.hlsli new file mode 100644 index 0000000000..909f7f66ba --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/FrameData.hlsli @@ -0,0 +1,72 @@ +#ifndef GI_FRAMEDATA_HLSL +#define GI_FRAMEDATA_HLSL + +#include "Raytracing/Includes/Types/Light.hlsli" +#include "Raytracing/Includes/SharedData.hlsli" + +struct +#ifdef __cplusplus +alignas(16) +#endif + SHaRCFrameData +{ + BOOL Enabled; + float SceneScale; + uint AccumFrameNum; + uint StaleFrameNum; + float RadianceScale; + BOOL AntifireflyFilter; + uint Capacity; + BOOL UpdatePass; +}; +#ifdef __cplusplus +static_assert(sizeof(SHaRCFrameData) % 4 == 0); +#endif + +struct +#ifdef __cplusplus +alignas(16) +#endif + FrameData +{ + float4x4 ViewInverse; + float4x4 ProjInverse; + float4 CameraData; + float4 NDCToView; + DirectionalLight Directional; + float3 Position; + uint FrameCount; + float3 PositionPrev; + BOOL RussianRoulette; + float2 Roughness; + float2 Metalness; + uint2 DispatchSize; + float Emissive; + float Effect; + float Sky; + float3 EmittanceColor; + SHaRCFrameData SHaRC; + FeatureData Features; + uint Lights; + float PixelConeSpreadAngle; + float TexLODBias; + float CloudOpacity; + int SSSSampleCount; + float SSSMaxSampleRadius; + BOOL SSSMaterialOverride; + BOOL EnableSssTransmission; + float3 OverrideSSSTransmissionColor; + float OverrideSSSScale; + float3 OverrideSSSScatteringColor; + float OverrideSSSAnisotropy; + float4 Pad1; + float4 Pad2; + float4x4 Pad3; + float4x4 Pad4; + float3x4 Pad5; +}; +#ifdef __cplusplus +static_assert(sizeof(FrameData) == 1024); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/Instance.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/Instance.hlsli new file mode 100644 index 0000000000..2c345a1605 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/Instance.hlsli @@ -0,0 +1,76 @@ +#ifndef INSTANCE_HLSL +#define INSTANCE_HLSL + +struct LightData +{ + uint Count; + uint Data[4]; + + uint GetGroup(uint index) + { + return index >> 2; + } + + uint GetOffset(uint index) + { + return (index & 3) << 3; + } + + uint GetID(uint index) + { + uint group = GetGroup(index); + uint offset = GetOffset(index); + + return (Data[group] >> offset) & 0xFFu; + } + +#ifdef __cplusplus + LightData() = default; + + LightData(const eastl::vector& ids) + { + StoreIDs(ids); + } + + void SetID(uint index, uint val) + { + uint group = GetGroup(index); + uint offset = GetOffset(index); + uint mask = ~(0xFFu << offset); + Data[group] = (Data[group] & mask) | ((val & 0xFFu) << offset); + } + + void StoreIDs(const eastl::vector& ids) + { + size_t count = std::min(ids.size(), static_cast(16)); + Count = static_cast(count); + + for (size_t i = 0; i < count; ++i) { + uint32_t id = std::min(static_cast(ids[i]), 255u); + SetID(static_cast(i), id); + } + } +#endif +}; + + +#ifdef __cplusplus +struct InstanceData +#else +struct Instance +#endif +{ +#ifndef __cplusplus + row_major +#endif + float3x4 Transform; + + LightData LightData; + uint FirstGeometryID; +}; + +#ifdef __cplusplus +static_assert(sizeof(InstanceData) % 4 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/Light.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/Light.hlsli new file mode 100644 index 0000000000..93f4805dc7 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/Light.hlsli @@ -0,0 +1,47 @@ +#ifndef LIGHT_HLSL +#define LIGHT_HLSL + +#ifndef __cplusplus +namespace LightFlags +{ + static const uint16_t ISL = (1 << 0); + static const uint16_t LinearLight = (1 << 1); +} +#endif + +struct +#ifdef __cplusplus +alignas(16) +#endif + Light +{ + float3 Vector; + float Radius; + float3 Color; + float InvRadius; + float FadeZone; + float SizeBias; + float Fade; + uint16_t Type; + uint16_t Flags; +}; +#ifdef __cplusplus +static_assert(sizeof(Light) % 16 == 0); +#endif + +struct +#ifdef __cplusplus +alignas(16) +#endif + DirectionalLight +{ + float3 Vector; + float Pad0; + float3 Color; + float Pad1; +}; +#ifdef __cplusplus +static_assert(sizeof(DirectionalLight) % 16 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/Material.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/Material.hlsli new file mode 100644 index 0000000000..8069f56af6 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/Material.hlsli @@ -0,0 +1,266 @@ +#ifndef MATERIAL_HLSL +#define MATERIAL_HLSL + +#ifndef __cplusplus +namespace ShaderType +{ + static const uint16_t TruePBR = 0; + static const uint16_t Lighting = 1; + static const uint16_t Effect = 2; + static const uint16_t Grass = 3; + static const uint16_t Water = 4; + static const uint16_t BloodSplatter = 5; + static const uint16_t DistantTree = 6; + static const uint16_t Particle = 7; +} + +namespace ShaderFlags +{ + static const uint kSpecular = (1 << 0); + static const uint kTempRefraction = (1 << 1); + static const uint kVertexAlpha = (1 << 2); + static const uint kGrayscaleToPaletteColor = (1 << 3); + static const uint kGrayscaleToPaletteAlpha = (1 << 4); + static const uint kFalloff = (1 << 5); + static const uint kEnvMap = (1 << 6); + static const uint kFace = (1 << 7); + static const uint kModelSpaceNormals = (1 << 8); + static const uint kRefraction = (1 << 9); + static const uint kProjectedUV = (1 << 10); + static const uint kExternalEmittance = (1 << 11); + static const uint kVertexColors = (1 << 12); + static const uint kMultiTextureLandscape = (1 << 13); + static const uint kEyeReflect = (1 << 14); + static const uint kHairTint = (1 << 15); + static const uint kTwoSided = (1 << 16); + static const uint kAssumeShadowmask = (1 << 17); + static const uint kBackLighting = (1 << 18); + static const uint kTreeAnim = (1 << 19); +} + +namespace Feature +{ + static const uint16_t kDefault = 0; + static const uint16_t kEnvironmentMap = 1; + static const uint16_t kGlowMap = 2; + static const uint16_t kParallax = 3; + static const uint16_t kFaceGen = 4; + static const uint16_t kFaceGenRGBTint = 5; + static const uint16_t kHairTint = 6; + static const uint16_t kParallaxOcc = 7; + static const uint16_t kMultiTexLand = 8; + static const uint16_t kLODLand = 9; + static const uint16_t kUnknown = 10; + static const uint16_t kMultilayerParallax = 11; + static const uint16_t kTreeAnim = 12; + static const uint16_t kMultiIndexTriShapeSnow = 14; + static const uint16_t kLODObjectsHD = 15; + static const uint16_t kEye = 16; + static const uint16_t kCloud = 17; + static const uint16_t kLODLandNoise = 18; + static const uint16_t kMultiTexLandLODBlend = 19; +} + +namespace AlphaFlags +{ + static const uint16_t kOpaque = 0; + static const uint16_t kAlphaBlend = (1 << 0); + static const uint16_t kAlphaTest = (1 << 1); +} +#endif + +// DirectX 12 is very picky about buffer alignment, make sure all variable boundaries are properly aligned +// https://maraneshi.github.io/HLSL-ConstantBufferLayoutVisualizer/ +#ifdef __cplusplus +struct MaterialData +#else +struct Material +#endif +{ + half4 TexCoordOffsetScale0; + half4 TexCoordOffsetScale1; + + half4 Color0; + half4 Color1; + half4 Color2; + + half Scalar0; + half Scalar1; + half Scalar2; + half Scalar3; + + uint16_t AlphaFlags; + + // Textures + uint16_t Texture0; + uint16_t Texture1; + uint16_t Texture2; + uint16_t Texture3; + uint16_t Texture4; + uint16_t Texture5; + + uint16_t Texture6; + uint16_t Texture7; + uint16_t Texture8; + uint16_t Texture9; + uint16_t Texture10; + uint16_t Texture11; + + uint16_t Texture12; + uint16_t Texture13; + uint16_t Texture14; + uint16_t Texture15; + uint16_t Texture16; + uint16_t Texture17; + + uint16_t Texture18; + uint16_t Texture19; + + uint16_t ShaderType; + uint16_t Feature; + uint16_t PBRFlags; + uint32_t ShaderFlags; // Max 32 flags + + // Shared + half4 BaseColor() + { + return Color0; + } + + half4 EffectColor() + { + return Color1; + } + + uint16_t BaseTexture() + { + return Texture0; + } + + uint16_t NormalTexture() + { + return Texture1; + } + + uint16_t EffectTexture() + { + return Texture2; + } + + // Vanilla + half4 SpecularColor() + { + return Color2; + } + + uint16_t GlowTexture() + { + return Texture2; + } + + uint16_t SpecularTexture() + { + return Texture3; + } + + uint16_t EnvTexture() + { + return Texture4; + } + + uint16_t EnvMaskTexture() + { + return Texture4; + } + + // Vanilla - FaceGen + uint16_t TintTexture() + { + return Texture4; + } + + uint16_t DetailTexture() + { + return Texture5; + } + + // Landscape + half2 TexOffset() + { + return half2(Scalar0, Scalar1); + } + + half TexFade() + { + return Scalar2; + } + + half4 BlendParams() + { + return Color0; + } + + uint16_t OverlayTexture() + { + return Texture18; + } + + uint16_t NoiseTexture() + { + return Texture19; + } + + // True PBR + half RoughnessScale() + { + return Scalar0; + } + + half SpecularLevel() + { + return Scalar1; + } + + uint16_t EmissiveTexture() + { + return Texture2; + } + + uint16_t RMAOSTexture() + { + return Texture3; + } + + uint16_t SubsurfaceTexture() + { + return Texture6; + } + + half SubsurfaceScale() + { + return Scalar2; + } + + half4 SubsurfaceScatteringColor() + { + return Color2; + } + + half AlphaThreshold() + { + return Scalar3; + } + +#ifndef __cplusplus + float2 TexCoord(float2 texCoord) + { + return texCoord * TexCoordOffsetScale0.zw + TexCoordOffsetScale0.xy; + } +#endif +}; + +#ifdef __cplusplus +static_assert(sizeof(MaterialData) % 4 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/ShadowsFrameData.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/ShadowsFrameData.hlsli new file mode 100644 index 0000000000..2c56c7f109 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/ShadowsFrameData.hlsli @@ -0,0 +1,22 @@ +#ifndef SHADOW_FRAMEDATA_HLSL +#define SHADOW_FRAMEDATA_HLSL + +struct +#ifdef __cplusplus +alignas(16) +#endif + ShadowsFrameData +{ + float4 CameraData; + float4 NDCToView; + float4x4 ViewInverse; + float4 Position; + float4 Direction; + float4x4 Pad0; + float4x4 Pad1; +}; +#ifdef __cplusplus +static_assert(sizeof(ShadowsFrameData) % 256 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/Shape.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/Shape.hlsli new file mode 100644 index 0000000000..5e86076abd --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/Shape.hlsli @@ -0,0 +1,27 @@ +#ifndef SHAPE_HLSL +#define SHAPE_HLSL + +#include "Raytracing/Includes/Types/Material.hlsli" + +#ifdef __cplusplus +struct ShapeData +{ + MaterialData Material; + uint GeometryIdx; + uint2 Pad0; + float3x4 Transform; +}; + +static_assert(sizeof(ShapeData) % 4 == 0); + +#else +struct Shape +{ + Material Material; + uint GeometryIdx; + uint2 Pad0; + row_major float3x4 Transform; +}; +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/Skinning.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/Skinning.hlsli new file mode 100644 index 0000000000..e91b965a26 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/Skinning.hlsli @@ -0,0 +1,40 @@ +#ifndef SKINNING_HLSL +#define SKINNING_HLSL + +struct Skinning +{ + half weight[4]; + +#ifdef __cplusplus + uint8_t bone[4]; +#else + uint bone; +#endif + +#ifdef __cplusplus + Skinning() = default; + + Skinning(eastl::vector weights, eastl::vector boneIds) + { + auto weightCount = weights.size(); + auto boneIdsCount = boneIds.size(); + + for (size_t i = 0; i < 4; i++) { + weight[i] = i < weightCount ? weights[i] : half(0.0f); + bone[i] = i < boneIdsCount ? boneIds[i] : 0; + } + } +#else + uint GetBone(uint idx) + { + uint shift = idx * 8; + return (bone >> shift) & 0xFF; + } +#endif +}; + +#ifdef __cplusplus +static_assert(sizeof(Skinning) % 4 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/Triangle.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/Triangle.hlsli new file mode 100644 index 0000000000..0e3c2ad775 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/Triangle.hlsli @@ -0,0 +1,14 @@ +#ifndef TRIANGLE_HLSL +#define TRIANGLE_HLSL + +struct Triangle +{ + uint16_t x; + uint16_t y; + uint16_t z; +}; +#ifdef __cplusplus +static_assert(sizeof(Triangle) == 6); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/Vertex.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/Vertex.hlsli new file mode 100644 index 0000000000..68b7328cb7 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/Vertex.hlsli @@ -0,0 +1,22 @@ +#ifndef VERTEX_HLSL +#define VERTEX_HLSL + +#include "Raytracing/Includes/Types/byte4.hlsli" + +struct Vertex +{ + float3 Position; + half2 Texcoord0; + half3 Normal; + half3 Tangent; + ubyte4f Color; // Color before bitangent fixes alignment + half3 Bitangent; + uint16_t Pad; // Padding to 4-byte boundary else things break + ubyte4f LandBlend0; + ubyte4f LandBlend1; +}; +#ifdef __cplusplus +static_assert(sizeof(Vertex) % 4 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/VertexUpdate.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/VertexUpdate.hlsli new file mode 100644 index 0000000000..d62bcbcea0 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/VertexUpdate.hlsli @@ -0,0 +1,18 @@ +#ifndef VERTEX_UPDATE_HLSL +#define VERTEX_UPDATE_HLSL + +struct VertexUpdateData +{ + uint index; + uint updateFlags; + uint vertexCount; + uint boneOffset; + uint shapeFlags; + uint3 pad0; +}; + +#ifdef __cplusplus +static_assert(sizeof(VertexUpdateData) % 4 == 0); +#endif + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/Types/byte4.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/Types/byte4.hlsli new file mode 100644 index 0000000000..c88c229b09 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/Types/byte4.hlsli @@ -0,0 +1,33 @@ +#ifndef BYTE4_HLSL +#define BYTE4_HLSL + +struct ubyte4f +{ + uint packed; + + #ifndef __cplusplus + half4 unpack() + { + return half4( + (half)(packed & 0xFF) / 255.0h, + (half)((packed >> 8) & 0xFF) / 255.0h, + (half)((packed >> 16) & 0xFF) / 255.0h, + (half)(packed >> 24) / 255.0h + ); + } + #endif +}; + +struct byte4f +{ + ubyte4f packed; + + #ifndef __cplusplus + half4 unpack() + { + return packed.unpack() * 2.0h - 1.0h; + } + #endif +}; + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Includes/VanillaToPBR.hlsli b/features/Raytracing/Shaders/Raytracing/Includes/VanillaToPBR.hlsli new file mode 100644 index 0000000000..54f52ac9d3 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Includes/VanillaToPBR.hlsli @@ -0,0 +1,27 @@ +#ifndef VANILA_TO_PBR_HLSLI +#define VANILA_TO_PBR_HLSLI + +#include "Common/Color.hlsli" + +float CalcSpecularity(float3 specularColor, float glossiness) +{ + return saturate(max(specularColor.r, max(specularColor.g, specularColor.b)) * glossiness); +} + +float RemappedSpecularity(float specularity) +{ + return 1.0f - (specularity * 0.75f + 0.25f); +} + +float CalcRoughness(float roughnessFromShininess, float specularity) +{ + return roughnessFromShininess * RemappedSpecularity(specularity); +} + +float CalcMetallic(float3 albedo, float specularity, float roughnessFromShininess) +{ + const float albedoLuminance = saturate(Color::RGBToLuminance(albedo)); + return (1.0f - roughnessFromShininess) * (specularity * specularity) * (1.0f - albedoLuminance); +} + +#endif \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/ModelSpaceToTangent.hlsl b/features/Raytracing/Shaders/Raytracing/ModelSpaceToTangent.hlsl new file mode 100644 index 0000000000..e44ff5e07d --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/ModelSpaceToTangent.hlsl @@ -0,0 +1,53 @@ +struct VS_INPUT +{ + float4 Position : POSITION0; + float2 TexCoord0 : TEXCOORD0; + float4 Normal : NORMAL0; + float4 Tangent : TANGENT0; + float4 Color : COLOR0; + float4 Bitangent : BINORMAL0; +}; + +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; + float2 TexCoord0 : TEXCOORD0; + float3 Normal : TEXCOORD1; + float3 Tangent : TEXCOORD2; + float3 Bitangent : TEXCOORD3; +}; + +VS_OUTPUT vertex(VS_INPUT input) +{ + VS_OUTPUT output; + + float2 pos = input.TexCoord0.xy * 2.0f - 1.0f; + + output.Position = float4(pos.x, -pos.y, 1.0, 1.0); + output.TexCoord0 = input.TexCoord0.xy; + output.Normal = input.Normal.xzy; + output.Tangent = input.Tangent.xzy; + output.Bitangent = input.Bitangent.xzy; + + return output; +} + +SamplerState MSNSampler : register(s0); +Texture2D MSNormalMap : register(t0); + +float4 pixel(VS_OUTPUT input) : SV_Target +{ + float4 msnNormalMap = MSNormalMap.SampleLevel(MSNSampler, input.TexCoord0, 0.0f); + float3 msNormals = normalize(msnNormalMap.xyz * 2.0f - 1.0f); + + float3 normal = normalize(input.Normal); + float3 tangent = normalize(input.Tangent); + float3 bitangent = normalize(input.Bitangent); + + float3x3 tbn = float3x3(tangent, bitangent, normal); + + float3 tangentNormal = mul(tbn, msNormals - normal); + tangentNormal.z = sqrt(saturate(1.0f - dot(tangentNormal.xy, tangentNormal.xy))); + + return float4(tangentNormal * 0.5f + 0.5f, msnNormalMap.w); +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/RTShadowsCS.hlsl b/features/Raytracing/Shaders/Raytracing/RTShadowsCS.hlsl new file mode 100644 index 0000000000..724d21cebd --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/RTShadowsCS.hlsl @@ -0,0 +1,61 @@ +#include "Raytracing/Includes/Common.hlsli" +#include "Raytracing/Includes/Types/ShadowsFrameData.hlsli" + +ConstantBuffer Frame : register(b0); + +RWTexture2D ShadowMask : register(u0); + +Texture2D DepthTexture : register(t0); +RaytracingAccelerationStructure Scene : register(t1); + +[numthreads(8, 8, 1)] +void main(uint2 id : SV_DispatchThreadID) +{ + const float2 size = float2(Frame.Position.w, Frame.Direction.w); + + if (any(id > size)) + return; + + const float depth = DepthTexture[id]; + + const float depthView = ScreenToViewDepth(depth, Frame.CameraData); + + if (depthView < FP_Z || depth >= SKY_Z) + { + ShadowMask[id] = float4(1.0f, 0.0f, 1.0f, 1.0f); + return; + } + + float2 uv = (id + 0.5f) / size; + + const float3 positionVS = ScreenToViewPosition(uv, depthView, Frame.NDCToView); + const float3 positionCS = ViewToWorldPosition(positionVS, Frame.ViewInverse); + const float3 positionWS = positionCS + Frame.Position.xyz; + + RayQuery q; + + float3 direction = normalize(Frame.Direction.xyz); + + RayDesc ray; + ray.Origin = positionWS + direction * 0.1f; + ray.Direction = direction; + ray.TMin = 0.01f; + ray.TMax = 1e30; + + q.TraceRayInline( + Scene, + RAY_FLAG_NONE, + 0xFF, + ray); + + q.Proceed(); + + if (q.CommittedStatus() == COMMITTED_TRIANGLE_HIT) + { + ShadowMask[id] = float4(0.0f, 0.0f, 0.0f, 1.0f); + } + else + { + ShadowMask[id] = float4(1.0f, 0.0f, 0.0f, 1.0f); + } +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Shadows/Miss.hlsl b/features/Raytracing/Shaders/Raytracing/Shadows/Miss.hlsl new file mode 100644 index 0000000000..6dea5556f2 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Shadows/Miss.hlsl @@ -0,0 +1,7 @@ +#include "Raytracing/Shadows/Payload.hlsli" + +[shader("miss")] +void main(inout Payload payload) +{ + payload.missed = 1.0f; +} diff --git a/features/Raytracing/Shaders/Raytracing/Shadows/Payload.hlsli b/features/Raytracing/Shaders/Raytracing/Shadows/Payload.hlsli new file mode 100644 index 0000000000..da146cb732 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Shadows/Payload.hlsli @@ -0,0 +1,4 @@ +struct Payload +{ + float missed; +}; \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/Shadows/RayGeneration.hlsl b/features/Raytracing/Shaders/Raytracing/Shadows/RayGeneration.hlsl new file mode 100644 index 0000000000..dab1f32255 --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/Shadows/RayGeneration.hlsl @@ -0,0 +1,54 @@ +#include "Raytracing/Shadows/Payload.hlsli" +#include "Raytracing/Includes/Common.hlsli" + +cbuffer ShadowsCB: register(b0) +{ + float4 CameraData; + float2 Size; + float4 NDCToView; + float4x4 ViewInverse; + float3 Position; + float3 Direction; + uint Pad[35]; +}; + +RWTexture2D ShadowMask : register(u0); + +Texture2D DepthTexture : register(t0); +RaytracingAccelerationStructure Scene : register(t1); + +[shader("raygeneration")] +void main() +{ + uint2 idx = DispatchRaysIndex().xy; + uint2 size = DispatchRaysDimensions().xy; + + const float depth = DepthTexture[idx]; + + const float depthView = ScreenToViewDepth(depth, CameraData); + + if (depthView < FP_Z || depth >= SKY_Z) + { + ShadowMask[idx] = float4(1.0f, 0.0f, 1.0f, 1.0f); + return; + } + + float2 uv = (idx + 0.5f) / size; + + const float3 positionVS = ScreenToViewPosition(uv, depthView, NDCToView); + const float3 positionCS = ViewToWorldPosition(positionVS, ViewInverse); + const float3 positionWS = positionCS + Position.xyz; + + RayDesc ray; + ray.Origin = positionWS + Direction * 0.1f; + ray.Direction = Direction; + ray.TMin = 0.01f; + ray.TMax = 1e30; + + Payload payload; + payload.missed = 0.0f; + + TraceRay(Scene, RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | RAY_FLAG_SKIP_CLOSEST_HIT_SHADER | RAY_FLAG_CULL_NON_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, 0xFF, 0, 0, 0, ray, payload); + + ShadowMask[idx] = float4(payload.missed, 0.0f, 0.0f, 1.0f); +} diff --git a/features/Raytracing/Shaders/Raytracing/ShadowsRT.hlsl b/features/Raytracing/Shaders/Raytracing/ShadowsRT.hlsl new file mode 100644 index 0000000000..1fecedeedf --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/ShadowsRT.hlsl @@ -0,0 +1,64 @@ +#include "Raytracing/Includes/Common.hlsli" +#include "Raytracing/Includes/Types/ShadowsFrameData.hlsli" + +ConstantBuffer Frame : register(b0); + +RWTexture2D ShadowMask : register(u0); + +Texture2D DepthTexture : register(t0); +RaytracingAccelerationStructure Scene : register(t1); + +struct Payload +{ + float missed; +}; + +[shader("raygeneration")] +void RayGeneration() +{ + uint2 idx = DispatchRaysIndex().xy; + uint2 size = DispatchRaysDimensions().xy; + + const float depth = DepthTexture[idx]; + + const float depthView = ScreenToViewDepth(depth, Frame.CameraData); + + float3 direction = normalize(Frame.Direction.xyz); + + if (depthView < FP_Z || depth >= SKY_Z) + { + ShadowMask[idx] = float4(1.0, 0.0, 0.0, 1.0f); + return; + } + + float2 uv = (idx + 0.5f) / size; + + const float3 positionVS = ScreenToViewPosition(uv, depthView, Frame.NDCToView); + const float3 positionCS = ViewToWorldPosition(positionVS, Frame.ViewInverse); + const float3 positionWS = positionCS + Frame.Position.xyz; + + RayDesc ray; + ray.Origin = positionWS + direction * 0.1f; + ray.Direction = direction; + ray.TMin = 0.01f; + ray.TMax = 1e30; + + Payload payload; + payload.missed = 0.0f; + + TraceRay(Scene, RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | RAY_FLAG_SKIP_CLOSEST_HIT_SHADER, 0xFF, 0, 0, 0, ray, payload); + + ShadowMask[idx] = float4(payload.missed, depth, 0.0f, 1.0f); +} + +[shader("miss")] +void Miss(inout Payload payload) +{ + payload.missed = 1.0f; +} + +[shader("closesthit")] +void ClosestHit(inout Payload payload, in BuiltInTriangleIntersectionAttributes attribs) +{ + payload.missed = 0.0f; +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/SharcResolveCS.hlsl b/features/Raytracing/Shaders/Raytracing/SharcResolveCS.hlsl new file mode 100644 index 0000000000..c6947b30ee --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/SharcResolveCS.hlsl @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +#define SHARC +#define SHARC_RESOLVE 1 + +#include "Raytracing/Includes/RT/SHaRC.hlsli" +#include "Raytracing/Includes/Types.hlsli" + +#define LINEAR_BLOCK_SIZE 256 + +ConstantBuffer Frame : register(b0, space0); + +RWStructuredBuffer u_SharcHashEntriesBuffer : register(u0, space0); +RWStructuredBuffer u_SharcAccumulationBuffer : register(u1, space0); +RWStructuredBuffer u_SharcResolvedBuffer : register(u2, space0); + +#include "Raytracing/Includes/RT/SHaRCHelper.hlsli" + +[numthreads(LINEAR_BLOCK_SIZE, 1, 1)] +void main(in uint2 did : SV_DispatchThreadID) +{ + SharcParameters sharcParameters = GetSharcParameters(); + SharcResolveParameters resolveParameters = GetSharcResolveParameters(); + + SharcResolveEntry(did.x, sharcParameters, resolveParameters); +} \ No newline at end of file diff --git a/features/Raytracing/Shaders/Raytracing/SkinningCS.hlsl b/features/Raytracing/Shaders/Raytracing/SkinningCS.hlsl new file mode 100644 index 0000000000..d83ec12a4f --- /dev/null +++ b/features/Raytracing/Shaders/Raytracing/SkinningCS.hlsl @@ -0,0 +1,124 @@ +#include "Raytracing/Includes/Types.hlsli" + +#include "Raytracing/Includes/Types/VertexUpdate.hlsli" +#include "Raytracing/Includes/Types/Skinning.hlsli" + +#define MAX_BONES (255) + +struct BoneMatrix +{ + row_major float3x4 World; +}; + +RWStructuredBuffer OutputVertices[] : register(u0); + +StructuredBuffer UpdateData : register(t0, space0); +StructuredBuffer BoneMatrices : register(t1, space0); + +StructuredBuffer DynamicVertices[] : register(t0, space1); + +StructuredBuffer Vertices[] : register(t0, space2); + +StructuredBuffer MeshSkinning[] : register(t0, space3); + + +namespace Flags +{ + static const uint Dynamic = (1 << 2); + static const uint Skinned = (1 << 3); +} + +float3x4 GetBoneTransformMatrix2(Skinning skinning, float3 pivot, uint boneOffset) +{ + float3x4 pivotMatrix = transpose(float4x3(0.0.xxx, 0.0.xxx, 0.0.xxx, pivot)); + + float3x4 boneMatrix1 = BoneMatrices[boneOffset + skinning.GetBone(0)].World; + float3x4 boneMatrix2 = BoneMatrices[boneOffset + skinning.GetBone(1)].World; + float3x4 boneMatrix3 = BoneMatrices[boneOffset + skinning.GetBone(2)].World; + float3x4 boneMatrix4 = BoneMatrices[boneOffset + skinning.GetBone(3)].World; + + return (boneMatrix1 - pivotMatrix) * skinning.weight[0] + + (boneMatrix2 - pivotMatrix) * skinning.weight[1] + + (boneMatrix3 - pivotMatrix) * skinning.weight[2] + + (boneMatrix4 - pivotMatrix) * skinning.weight[3]; +} + +float3x4 GetBoneTransformMatrix(Skinning skinning, uint boneOffset) +{ + float3x4 boneMatrix1 = BoneMatrices[boneOffset + skinning.GetBone(0)].World; + float3x4 boneMatrix2 = BoneMatrices[boneOffset + skinning.GetBone(1)].World; + float3x4 boneMatrix3 = BoneMatrices[boneOffset + skinning.GetBone(2)].World; + float3x4 boneMatrix4 = BoneMatrices[boneOffset + skinning.GetBone(3)].World; + + return boneMatrix1 * skinning.weight[0] + + boneMatrix2 * skinning.weight[1] + + boneMatrix3 * skinning.weight[2] + + boneMatrix4 * skinning.weight[3]; +} + +float3x3 GetBoneRSMatrix(Skinning skinning, uint boneOffset) +{ + float3x4 boneMatrix1 = BoneMatrices[boneOffset + skinning.GetBone(0)].World; + float3x4 boneMatrix2 = BoneMatrices[boneOffset + skinning.GetBone(1)].World; + float3x4 boneMatrix3 = BoneMatrices[boneOffset + skinning.GetBone(2)].World; + float3x4 boneMatrix4 = BoneMatrices[boneOffset + skinning.GetBone(3)].World; + + float3x3 rs1 = (float3x3)boneMatrix1; + float3x3 rs2 = (float3x3)boneMatrix2; + float3x3 rs3 = (float3x3)boneMatrix3; + float3x3 rs4 = (float3x3)boneMatrix4; + + return rs1 * skinning.weight[0] + + rs2 * skinning.weight[1] + + rs3 * skinning.weight[2] + + rs4 * skinning.weight[3]; +} + +#if defined(OPTIMIZED_MAPPING) +[numthreads(THREAD_GROUP_SIZE, 1, 1)] +void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint3 GID : SV_GroupID) +{ + const uint modelIndex = GID.x; + const uint vertexIndex = GID.y * THREAD_GROUP_SIZE + GTid.x; +#else +[numthreads(1, THREAD_GROUP_SIZE, 1)] +void main(uint3 DTid : SV_DispatchThreadID) +{ + const uint modelIndex = DTid.x; + const uint vertexIndex = DTid.y; +#endif + + VertexUpdateData updateData = UpdateData[modelIndex]; + + if (vertexIndex >= updateData.vertexCount) + return; + + uint shapeIndex = NonUniformResourceIndex(updateData.index); + + Vertex vertex = Vertices[shapeIndex][vertexIndex]; + + float3 position = vertex.Position; + + // Always fetch dynamic positions for dynamic shapes + if (updateData.shapeFlags & Flags::Dynamic) + position = DynamicVertices[shapeIndex][vertexIndex].xyz; + + if (updateData.updateFlags & Flags::Skinned) + { + Skinning skinning = MeshSkinning[shapeIndex][vertexIndex]; + + float3x4 boneMatrix = GetBoneTransformMatrix(skinning, updateData.boneOffset); + + position = mul(boneMatrix, float4(position, 1.0f)); + + float3x3 boneMatrixRot = (float3x3)boneMatrix; + + vertex.Normal = (half3) normalize(mul(boneMatrixRot, vertex.Normal)); + vertex.Tangent = (half3) normalize(mul(boneMatrixRot, vertex.Tangent)); + vertex.Bitangent = (half3) normalize(mul(boneMatrixRot, vertex.Bitangent)); + } + + vertex.Position = position; + + OutputVertices[shapeIndex][vertexIndex] = vertex; +} \ No newline at end of file diff --git a/features/Upscaling/Shaders/Upscaling/Streamline/nvngx_dlssd.dll b/features/Upscaling/Shaders/Upscaling/Streamline/nvngx_dlssd.dll new file mode 100644 index 0000000000..0939f3f602 Binary files /dev/null and b/features/Upscaling/Shaders/Upscaling/Streamline/nvngx_dlssd.dll differ diff --git a/features/Upscaling/Shaders/Upscaling/Streamline/sl.dlss_d.dll b/features/Upscaling/Shaders/Upscaling/Streamline/sl.dlss_d.dll new file mode 100644 index 0000000000..89e58ea1c1 Binary files /dev/null and b/features/Upscaling/Shaders/Upscaling/Streamline/sl.dlss_d.dll differ diff --git a/features/Wetness Effects/New Feature/Shaders/Features/NewFeature.ini b/features/Wetness Effects/New Feature/Shaders/Features/NewFeature.ini new file mode 100644 index 0000000000..19f01444dc --- /dev/null +++ b/features/Wetness Effects/New Feature/Shaders/Features/NewFeature.ini @@ -0,0 +1,2 @@ +[Info] +Version = 1-0-0 \ No newline at end of file diff --git a/features/Wetness Effects/New Feature/Shaders/NewFeature/nonexistent.cs.hlsl b/features/Wetness Effects/New Feature/Shaders/NewFeature/nonexistent.cs.hlsl new file mode 100644 index 0000000000..dcf508683c --- /dev/null +++ b/features/Wetness Effects/New Feature/Shaders/NewFeature/nonexistent.cs.hlsl @@ -0,0 +1,6 @@ +[numthreads(1, 1, 1)] +void main(int3 dtid : SV_DispatchThreadID) +{ + // It doesn't exist. + // - Exist, the Yapper +} \ No newline at end of file diff --git a/include/PCH.h b/include/PCH.h index 654e026d28..dffa664c64 100644 --- a/include/PCH.h +++ b/include/PCH.h @@ -88,6 +88,17 @@ namespace stl DetourTransactionCommit(); } + + template + void detour_thunk(size_t address) + { + T::func = address; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&T::func), reinterpret_cast(T::thunk)); + DetourTransactionCommit(); + } + template void detour_thunk_ignore_func(REL::RelocationID a_relId) { @@ -203,6 +214,7 @@ struct ankerl::unordered_dense::hash using float2 = DirectX::SimpleMath::Vector2; using float3 = DirectX::SimpleMath::Vector3; using float4 = DirectX::SimpleMath::Vector4; +using float3x4 = DirectX::XMFLOAT3X4; using float4x4 = DirectX::SimpleMath::Matrix; using uint = uint32_t; diff --git a/package/Shaders/Common/BRDF.hlsli b/package/Shaders/Common/BRDF.hlsli index 2cbad80112..f4f786e768 100644 --- a/package/Shaders/Common/BRDF.hlsli +++ b/package/Shaders/Common/BRDF.hlsli @@ -1,6 +1,7 @@ #ifndef __BRDF_DEPENDENCY_HLSL__ #define __BRDF_DEPENDENCY_HLSL__ +#include "Common/Color.hlsli" #include "Common/Math.hlsli" /** @@ -259,6 +260,20 @@ namespace BRDF return EnvBRDFApproxLazarov(roughness, NdotV); # endif } + + float ShadowedF90(float3 F0) { + // This scaler value is somewhat arbitrary, Schuler used 60 in his article. In here, we derive it from MIN_DIELECTRICS_F0 so + // that it takes effect for any reflectance lower than least reflective dielectrics + //const float t = 60.0f; + const float t = (1.0f / 0.04f); + return min(1.0f, t * Color::RGBToLuminance(F0)); + } + + float3 GGXEnergyConservationTerm(float3 F0, float roughness, float NdotV) + { + const float E = 1.0 - saturate(pow(roughness, NdotV / roughness) * ((roughness * NdotV + 0.0266916) / (NdotV + 0.466495))); + return 1 + F0 * ((1 - E) / E); + } } #endif // __BRDF_DEPENDENCY_HLSL__ \ No newline at end of file diff --git a/package/Shaders/Common/Game.hlsli b/package/Shaders/Common/Game.hlsli index 2e4a3be423..4bffb1157f 100644 --- a/package/Shaders/Common/Game.hlsli +++ b/package/Shaders/Common/Game.hlsli @@ -5,17 +5,17 @@ // Conversion constants #define GAME_UNIT_TO_CM 1.428f -#define GAME_UNIT_TO_M GAME_UNIT_TO_CM / 100.0f -#define GAME_UNIT_TO_FEET GAME_UNIT_TO_CM / 30.48f -#define GAME_UNIT_TO_INCHES GAME_UNIT_TO_CM / 2.54f +#define GAME_UNIT_TO_M (GAME_UNIT_TO_CM / 100.0f) +#define GAME_UNIT_TO_FEET (GAME_UNIT_TO_CM / 30.48f) +#define GAME_UNIT_TO_INCHES (GAME_UNIT_TO_CM / 2.54f) // Wind speed conversions -#define WIND_RAW_TO_NORMALIZED 1.0f / 255.0f -#define WIND_RAW_TO_PERCENT 100.0f / 255.0f +#define WIND_RAW_TO_NORMALIZED (1.0f / 255.0f) +#define WIND_RAW_TO_PERCENT (100.0f / 255.0f) // Direction conversions -#define DIR_RAW_TO_DEGREES 360.0f / 256.0f -#define DIR_RANGE_TO_DEGREES 180.0f / 256.0f -#define RADIANS_TO_DEGREES 180.0f / Math::PI +#define DIR_RAW_TO_DEGREES (360.0f / 256.0f) +#define DIR_RANGE_TO_DEGREES (180.0f / 256.0f) +#define RADIANS_TO_DEGREES (180.0f / Math::PI) #endif // __GAME_HLSLI__ \ No newline at end of file diff --git a/package/Shaders/Common/SharedData.hlsli b/package/Shaders/Common/SharedData.hlsli index 8ba6d4c5dc..498d6ddce4 100644 --- a/package/Shaders/Common/SharedData.hlsli +++ b/package/Shaders/Common/SharedData.hlsli @@ -238,6 +238,14 @@ namespace SharedData uint3 _padding; }; + struct RaytracingSettings + { + float InteriorDirectional; + float Ambient; + float EnvMap; + uint Albedo; + }; // had to add this here to pass test + cbuffer FeatureData : register(b6) { GrassLightingSettings grassLightingSettings; @@ -255,6 +263,7 @@ namespace SharedData ExtendedTranslucencySettings extendedTranslucencySettings; LinearLightingSettings linearLightingSettings; TerrainBlendingSettings terrainBlendingSettings; + RaytracingSettings raytracingSettings; }; Texture2D DepthTexture : register(t17); diff --git a/package/Shaders/DistantTree.hlsl b/package/Shaders/DistantTree.hlsl index 86236d35f1..6624c4d999 100644 --- a/package/Shaders/DistantTree.hlsl +++ b/package/Shaders/DistantTree.hlsl @@ -242,6 +242,11 @@ PS_OUTPUT main(PS_INPUT input) float3 normal = -normalize(cross(ddx, ddy)); float3 directionalAmbientColor = max(0, Color::Ambient(mul(SharedData::DirectionalAmbient, float4(normal, 1.0)))); + +# if defined(RT) + directionalAmbientColor *= SharedData::raytracingSettings.Ambient; +# endif + # if defined(IBL) float3 iblColor = 0; if (SharedData::iblSettings.EnableDiffuseIBL) { diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index e01129f82a..f53c1fd65e 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -11,6 +11,8 @@ #include "Common/SharedData.hlsli" #include "Common/Skinned.hlsli" +#include "Raytracing/Includes/VanillaToPBR.hlsli" + #if defined(FACEGEN) || defined(FACEGEN_RGB_TINT) # define SKIN #endif @@ -344,6 +346,8 @@ struct PS_OUTPUT float4 Masks : SV_Target6; # if defined(SNOW) float4 Parameters : SV_Target7; +# elif defined(RT) + float4 GeomNormalMetalnessAO : SV_Target7; # endif }; #else @@ -2115,6 +2119,10 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) Glints::PrecomputeGlints(glintNoise, uvOriginal, ddx(uvOriginal), ddy(uvOriginal), material.GlintScreenSpaceScale, material.GlintCache); # endif +# if defined(RT) + float3 trueBaseColor = baseColor.xyz; +# endif + baseColor.xyz *= 1 - material.Metallic; material.BaseColor = baseColor.xyz; @@ -2171,12 +2179,16 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) material.FuzzWeight = lerp(material.FuzzWeight, 0, projectedMaterialWeight); } # endif -# else +# else // TRUE_PBR material.BaseColor = baseColor.xyz; -# if defined(SPECULAR) +# if defined(SPECULAR) || defined(LANDSCAPE) material.Shininess = shininess; material.Glossiness = glossiness; +# if defined(LANDSCAPE) + material.SpecularColor = 1; +# else material.SpecularColor = SpecularColor.xyz; +# endif // LANDSCAPE # else material.Shininess = 0; material.Glossiness = 0; @@ -2188,7 +2200,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # if defined(BACK_LIGHTING) material.backLightColor = backLightColor.xyz; # endif -# endif // TRUE_PBR +# endif // TRUE_PBR # if defined(CS_HAIR) && defined(HAIR) if (SharedData::hairSpecularSettings.Enabled) { @@ -2207,6 +2219,10 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # if defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE) float envMask = EnvmapData.x * MaterialData.x; +# if defined(RT) + envMask *= SharedData::raytracingSettings.EnvMap; +# endif + float viewNormalAngle = dot(worldNormal.xyz, viewDirection); float3 envSamplingPoint = (viewNormalAngle * 2) * worldNormal.xyz - viewDirection; @@ -2280,7 +2296,6 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) envColor = envColorBase.xyz * envMask; } } - # endif // defined (ENVMAP) || defined (MULTI_LAYER_PARALLAX) || defined(EYE) float porosity = 1.0; @@ -2390,7 +2405,12 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) float llDirLightMult = SharedData::linearLightingSettings.enableLinearLighting && !SharedData::linearLightingSettings.isDirLightLinear && (inWorld || inReflection) && !SharedData::InInterior ? SharedData::linearLightingSettings.dirLightMult : 1.0f; float3 dirLightColor = Color::DirectionalLight(DirLightColor.xyz / max(llDirLightMult, 1e-5), SharedData::linearLightingSettings.isDirLightLinear) * llDirLightMult; + +# if defined(RT) + float3 dirLightColorMultiplier = SharedData::InInterior ? SharedData::raytracingSettings.InteriorDirectional : 1; +# else float3 dirLightColorMultiplier = 1; +# endif # if defined(WATER_EFFECTS) dirLightColorMultiplier *= WaterEffects::ComputeCaustics(waterData, input.WorldPosition.xyz, eyeIndex); @@ -2761,6 +2781,10 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) } # endif +# if defined(RT) + directionalAmbientColor *= SharedData::raytracingSettings.Ambient; +# endif + # if defined(SKYLIGHTING) float skylightingDiffuse = 1; float skylightingFadeOutFactor = 1.0; @@ -3135,6 +3159,9 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # endif // ANISOTROPIC_ALPHA psout.Diffuse.w = alpha; +# if !defined(DEFERRED) && defined(RT) + psout.Diffuse.w = SharedData::raytracingSettings.EnvMap; +# endif # endif # if defined(LIGHT_LIMIT_FIX) && defined(LLFDEBUG) @@ -3169,7 +3196,12 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) psout.MotionVectors.zw = float2(0.0, psout.Diffuse.w); psout.Specular = float4(specularColor, psout.Diffuse.w); + +# if defined(TRUE_PBR) && defined(RT) + psout.Albedo = float4(SharedData::raytracingSettings.Albedo ? trueBaseColor * vertexColor : outputAlbedo, psout.Diffuse.w); +# else psout.Albedo = float4(outputAlbedo, psout.Diffuse.w); +# endif # if defined(WETNESS_EFFECTS) indirectLobeWeights.specular += wetnessReflectance; @@ -3180,7 +3212,18 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # endif psout.Reflectance = float4(indirectLobeWeights.specular, psout.Diffuse.w); - psout.NormalGlossiness = float4(GBuffer::EncodeNormal(screenSpaceNormal), saturate(1.0 - material.Roughness), psout.Diffuse.w); + +# if defined(TRUE_PBR) || !defined(RT) + const float roughness = material.Roughness; + const float metallic = material.Metallic; +# else + const float specularity = CalcSpecularity(material.SpecularColor, glossiness); + const float roughnessFromShininess = ShininessToRoughness(material.Shininess); + const float roughness = CalcRoughness(roughnessFromShininess, specularity); + const float metallic = CalcMetallic(outputAlbedo, specularity, roughnessFromShininess); +# endif + + psout.NormalGlossiness = float4(GBuffer::EncodeNormal(screenSpaceNormal), saturate(1.0 - roughness), psout.Diffuse.w); # if defined(SNOW) # if defined(TRUE_PBR) @@ -3200,7 +3243,27 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) float stochasticBlend = (screenNoise * screenNoise) < psout.Diffuse.w ? 1.0 : 0.0; psout.NormalGlossiness.w = stochasticBlend; -# endif + +# if defined(RT) +# if !defined(SNOW) + + float3 worldGeomNormal; + +# if defined(MODELSPACENORMALS) + float3 dd_x = ddx(input.WorldPosition.xyz); + float3 dd_y = ddy(input.WorldPosition.xyz); + + worldGeomNormal = -normalize(cross(dd_x, dd_y)); +# else + worldGeomNormal = vertexNormal; +# endif + + float3 screenGeomNormal = normalize(FrameBuffer::WorldToView(worldGeomNormal, false, eyeIndex)); + + psout.GeomNormalMetalnessAO = float4(GBuffer::EncodeNormal(screenGeomNormal), metallic, psout.Diffuse.w); +# endif // !defined(SNOW) +# endif // !defined(RT) +# endif // DEFERRED if ((!inWorld && !inReflection) && SharedData::linearLightingSettings.enableLinearLighting && !(Permutation::PixelShaderDescriptor & Permutation::LightingFlags::DefShadow)) { psout.Diffuse.xyz = Color::TrueLinearToGamma(psout.Diffuse.xyz); diff --git a/package/Shaders/RunGrass.hlsl b/package/Shaders/RunGrass.hlsl index f9b2f6c16e..57e9d9600b 100644 --- a/package/Shaders/RunGrass.hlsl +++ b/package/Shaders/RunGrass.hlsl @@ -744,7 +744,16 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) # if defined(TRUE_PBR) float3 indirectDiffuseLobeWeight, indirectSpecularLobeWeight; + +# if defined(RT) + if (SharedData::raytracingSettings.Albedo) + { + indirectDiffuseLobeWeight = baseColor.xyz; + indirectSpecularLobeWeight = 0; + } else +# else PBR::GetIndirectLobeWeights(indirectDiffuseLobeWeight, indirectSpecularLobeWeight, normal, normal, viewDirection, baseColor.xyz, pbrSurfaceProperties); +# endif diffuseColor.xyz += transmissionColor; specularColor.xyz += specularColorPBR; @@ -754,6 +763,10 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) float3 directionalAmbientColor = Color::Ambient(max(0, mul(SharedData::DirectionalAmbient, float4(normal, 1.0)))); +# if defined(RT) + directionalAmbientColor *= SharedData::raytracingSettings.Ambient; +# endif + # if defined(IBL) if (SharedData::iblSettings.EnableDiffuseIBL && (!SharedData::InInterior || SharedData::iblSettings.EnableInterior)) { directionalAmbientColor *= SharedData::iblSettings.DALCAmount; diff --git a/src/Deferred.cpp b/src/Deferred.cpp index 896d7b4882..f54b581c5c 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -8,6 +8,7 @@ #include "Features/DynamicCubemaps.h" #include "Features/IBL.h" +#include "Features/Raytracing.h" #include "Features/ScreenSpaceGI.h" #include "Features/Skylighting.h" #include "Features/SubsurfaceScattering.h" @@ -33,12 +34,13 @@ struct BlendStates } }; -void SetupRenderTarget(RE::RENDER_TARGET target, D3D11_TEXTURE2D_DESC texDesc, D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc, D3D11_RENDER_TARGET_VIEW_DESC rtvDesc, D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc, DXGI_FORMAT format, uint bindFlags) +void SetupRenderTarget(RE::RENDER_TARGET target, D3D11_TEXTURE2D_DESC texDesc, D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc, D3D11_RENDER_TARGET_VIEW_DESC rtvDesc, D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc, DXGI_FORMAT format, uint bindFlags, uint miscFlags = 0) { auto renderer = globals::game::renderer; auto device = globals::d3d::device; texDesc.BindFlags = bindFlags; + texDesc.MiscFlags = miscFlags; texDesc.Format = format; srvDesc.Format = format; rtvDesc.Format = format; @@ -98,16 +100,30 @@ void Deferred::SetupResources() // TEMPORAL_AA_WATER_1 // TEMPORAL_AA_WATER_2 + auto& rt = globals::features::raytracing; + + uint miscFlags = 0; + + if (rt.loaded) { + miscFlags = D3D11_RESOURCE_MISC_SHARED | D3D11_RESOURCE_MISC_SHARED_NTHANDLE; + } + // Albedo - SetupRenderTarget(ALBEDO, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R10G10B10A2_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); + SetupRenderTarget(ALBEDO, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R10G10B10A2_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE, miscFlags); // Specular SetupRenderTarget(SPECULAR, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R11G11B10_FLOAT, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); // Reflectance - SetupRenderTarget(REFLECTANCE, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R8G8B8A8_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); + SetupRenderTarget(REFLECTANCE, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R8G8B8A8_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE, miscFlags); // Normal + Roughness SetupRenderTarget(NORMALROUGHNESS, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R10G10B10A2_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); // Masks SetupRenderTarget(MASKS, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R11G11B10_FLOAT, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE); + + // Masks2 (Geometry normals + Metallic/AO) + if (rt.loaded) { + SetupRenderTarget(MASKS2, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R16G16B16A16_UNORM, D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE, miscFlags); + rt.SetupSharedRT(); + } } { @@ -157,28 +173,6 @@ void Deferred::SetupResources() copyShadowCS = static_cast(Util::CompileShader(L"Data\\Shaders\\CopyShadowDataCS.hlsl", {}, "cs_5_0")); } - - { - D3D11_TEXTURE2D_DESC texDesc; - auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; - mainTex.texture->GetDesc(&texDesc); - - texDesc.Format = DXGI_FORMAT_R11G11B10_FLOAT; - texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; - - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { - .Format = texDesc.Format, - .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, - .Texture2D = { - .MostDetailedMip = 0, - .MipLevels = 1 } - }; - D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { - .Format = texDesc.Format, - .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, - .Texture2D = { .MipSlice = 0 } - }; - } } void Deferred::CopyShadowData() @@ -315,7 +309,7 @@ void Deferred::StartDeferred() SPECULAR, REFLECTANCE, MASKS, - RE::RENDER_TARGET::kNONE + MASKS2 }; for (uint i = 2; i < 8; i++) { @@ -353,6 +347,15 @@ void Deferred::StartDeferred() void Deferred::DeferredPasses() { + auto& rt = globals::features::raytracing; + + if (rt.Active() && rt.settings.GlobalIllumination) { + rt.DrawRTGI(); + + if (rt.settings.PathTracing) + return; + } + ZoneScoped; TracyD3D11Zone(globals::state->tracyCtx, "Deferred"); @@ -532,8 +535,7 @@ void Deferred::OverrideBlendStates() blendDesc.RenderTarget[i].RenderTargetWriteMask = blendDesc.RenderTarget[0].RenderTargetWriteMask; } - // Normals and motion vectors must use alpha blending - for (int i = 1; i < 3; i++) { + auto setAlphaBlending = [&](int i) { blendDesc.RenderTarget[i].BlendEnable = blendDesc.RenderTarget[0].BlendEnable; blendDesc.RenderTarget[i].SrcBlend = D3D11_BLEND_SRC_ALPHA; blendDesc.RenderTarget[i].DestBlend = D3D11_BLEND_INV_SRC_ALPHA; @@ -542,7 +544,12 @@ void Deferred::OverrideBlendStates() blendDesc.RenderTarget[i].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA; blendDesc.RenderTarget[i].BlendOpAlpha = D3D11_BLEND_OP_ADD; blendDesc.RenderTarget[i].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; - } + }; + + // Normals, motion vectors and geometry normals must use alpha blending + setAlphaBlending(1); + setAlphaBlending(2); + setAlphaBlending(7); DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[a][b][c][d])); } else { @@ -617,6 +624,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainComposite() if (globals::features::ibl.loaded) defines.push_back({ "IBL", nullptr }); + if (globals::features::raytracing.loaded) + defines.push_back({ "RT", nullptr }); + if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); @@ -642,6 +652,9 @@ ID3D11ComputeShader* Deferred::GetComputeMainCompositeInterior() if (globals::features::ibl.loaded) defines.push_back({ "IBL", nullptr }); + if (globals::features::raytracing.loaded) + defines.push_back({ "RT", nullptr }); + if (REL::Module::IsVR()) defines.push_back({ "FRAMEBUFFER", nullptr }); diff --git a/src/Feature.cpp b/src/Feature.cpp index 923981a161..e310c93b66 100644 --- a/src/Feature.cpp +++ b/src/Feature.cpp @@ -16,6 +16,7 @@ #include "Features/LightLimitFix.h" #include "Features/LinearLighting.h" #include "Features/PerformanceOverlay.h" +#include "Features/Raytracing.h" #include "Features/RenderDoc.h" #include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" @@ -235,7 +236,8 @@ const std::vector& Feature::GetFeatureList() &globals::features::renderDoc, &globals::features::weatherEditor, &globals::features::linearLighting, - &globals::features::unifiedWater + &globals::features::unifiedWater, + &globals::features::raytracing }; if (REL::Module::IsVR()) { diff --git a/src/FeatureBuffer.cpp b/src/FeatureBuffer.cpp index 5bf884053d..9c4e057915 100644 --- a/src/FeatureBuffer.cpp +++ b/src/FeatureBuffer.cpp @@ -10,6 +10,7 @@ #include "Features/LODBlending.h" #include "Features/LightLimitFix.h" #include "Features/LinearLighting.h" +#include "Features/Raytracing.h" #include "Features/Skylighting.h" #include "Features/TerrainBlending.h" #include "Features/TerrainShadows.h" @@ -51,5 +52,6 @@ std::pair GetFeatureBufferData(bool a_inWorld) globals::features::ibl.settings, globals::features::extendedTranslucency.GetCommonBufferData(), globals::features::linearLighting.GetCommonBufferData(), - globals::features::terrainBlending.settings); + globals::features::terrainBlending.settings, + globals::features::raytracing.GetCommonBufferData()); } \ No newline at end of file diff --git a/src/Features/Raytracing.cpp b/src/Features/Raytracing.cpp new file mode 100644 index 0000000000..8b6b067336 --- /dev/null +++ b/src/Features/Raytracing.cpp @@ -0,0 +1,4530 @@ +#include "Raytracing.h" +#include "InverseSquareLighting.h" +#include "TerrainBlending.h" + +#include "Globals.h" +#include "Raytracing/ShaderUtils.h" +#include "ShaderCache.h" +#include "State.h" + +#include "Deferred.h" +#include +#include +#include + +#include "Utils/PerfUtils.h" + +#include "Menu.h" + +#include "Features/CloudShadows.h" +#include "Features/ExtendedMaterials.h" +#include "Features/ExtendedTranslucency.h" +#include "Features/HairSpecular.h" +#include "Features/LinearLighting.h" +#include "Features/WetnessEffects.h" +#include "Features/Upscaling.h" + +#include + +// WhiteFurnace here just so 'else' RAYTRACING_EXTRA_FIELDS is not empty +#ifdef DLSS_RR +# define RAYTRACING_EXTRA_FIELDS WhiteFurnace, DLSSRR +#else +# define RAYTRACING_EXTRA_FIELDS WhiteFurnace +#endif + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + Raytracing::Settings, + Enabled, + GlobalIllumination, + AdvancedSettings, + TraceMode, + Denoiser, + Resolution, + Bounces, + SamplesPerPixel, + Roughness, + Metalness, + Emissive, + Effect, + Sky, + Directional, + Point, + TexLODBias, + LodDimmer, + RaytracedShadows, + PathTracing, + CullShadows, + RussianRoulette, + ConvertToGamma, + PerformanceOverlay, + DebugOutput, + EnablePIXCapture, + PIXCaptureLocation, + EnableDebugDevice, + SHaRC, + SVGFDiffuse, + SVGFSpecular, + DisableSkinned, + InteriorSun, + RAYTRACING_EXTRA_FIELDS) + +//////////////////////////////////////////////////////////////////////////////////// + +void Raytracing::RestoreDefaultSettings() +{ + settings = {}; + + recompileReason |= RecompileReason::RestoreDefaultsSettings; +} + +void Raytracing::LoadSettings(json& o_json) +{ + settings = o_json; + + recompileReason |= RecompileReason::LoadSettings; +} + +void Raytracing::SaveSettings(json& o_json) +{ + o_json = settings; +} + +std::vector Raytracing::GetActiveConstraints() const +{ + std::vector constraints; + + // Only impose constraints when the feature is loaded and enabled + if (!loaded || !settings.Enabled) { + return constraints; + } + + // Terrain Blending has visual issues with VR depth buffer culling in exteriors + constraints.push_back({ { "Upscaling", "upscaleMethod" }, + static_cast(Upscaling::UpscaleMethod::kNONE), + "Upscaling also creates a DirectX12 device which results in a crash.", + true }); + + return constraints; +} + +static void DrawFloat2(const char* label, float2& v, float min = 0.0f, float max = 1.0f) +{ + float floats[2] = { v.x, v.y }; + if (ImGui::SliderFloat2(label, floats, min, max)) { + v = { floats[0], floats[1] }; + v.Clamp({ min, min }, { max, max }); + } +} + +template + requires std::is_enum_v +static bool DrawEnumRadio(const char* label, T& variable, const char* tooltip = nullptr, const char* const* tooltips = nullptr) +{ + ImGui::PushID(label); + + auto variablePrev = variable; + + int denoiser = static_cast(variable); + ImGui::TextUnformatted(label); + + if (tooltip != nullptr) + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("%s", tooltip); + + ImGui::SameLine(); + ImGui::Dummy(ImVec2(25, 0)); + + auto i = 0; + + for (auto& [value, name] : magic_enum::enum_entries()) { + ImGui::SameLine(); + ImGui::RadioButton(name.data(), &denoiser, static_cast(value)); + + if (tooltips != nullptr) + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("%s", tooltips[i]); + + i++; + } + + ImGui::PopID(); + + variable = static_cast(denoiser); + + return variable != variablePrev; +} + +template + requires std::is_enum_v +static bool DrawEnumCombo(const char* label, T& variable, const char* tooltip = nullptr, const char* const* tooltips = nullptr) +{ + ImGui::PushID(label); + + auto variablePrev = variable; + + if (ImGui::BeginCombo(label, magic_enum::enum_name(variable).data())) { + auto i = 0; + + for (auto& value : magic_enum::enum_values()) { + bool isSelected = (variable == value); + + if (ImGui::Selectable(magic_enum::enum_name(value).data(), isSelected)) + variable = value; + + if (tooltips != nullptr) + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("%s", tooltips[i]); + + if (isSelected) + ImGui::SetItemDefaultFocus(); + + i++; + } + + ImGui::EndCombo(); + } else if (tooltip != nullptr) { + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("%s", tooltip); + } + + ImGui::PopID(); + + return variable != variablePrev; +} + +void Raytracing::DrawSettings() +{ + if (ImGui::BeginTabBar("Settings")) { + DrawGeneralSettings(); + DrawAdvancedSettings(); + DrawDebugSettings(); + + ImGui::EndTabBar(); + } + + if (recompileReason != RecompileReason::None) { + CompileRTGIShaders(); + CompileCompositeShader(); + recompileReason = RecompileReason::None; + accumulatedFrames = 0; // Reset accumulation on recompile/settings change + } +} + +void Raytracing::DrawSHaRCSettings() +{ + if (settings.TraceMode != TraceMode::SHaRC) + return; + + if (ImGui::CollapsingHeader("SHaRC")) { + auto& sharcSettings = settings.SHaRC; + + ImGui::DragFloat("Scale", &sharcSettings.SceneScale, 0.001f, 0.1f, 10.0f); + sharcSettings.SceneScale = std::clamp(sharcSettings.SceneScale, 0.1f, 10.0f); + + ImGui::InputInt("Accumulation Frames", &sharcSettings.AccumFrameNum); + sharcSettings.AccumFrameNum = std::clamp(sharcSettings.AccumFrameNum, 5, 100); + + ImGui::InputInt("Stale Frames", &sharcSettings.StaleFrameNum); + sharcSettings.StaleFrameNum = std::clamp(sharcSettings.StaleFrameNum, 8, 128); + + ImGui::Checkbox("Antifirefly Filter", &sharcSettings.AntifireflyFilter); + } +} +//SVGFDiffuse + +void Raytracing::DrawSVGFSettings() +{ + if (settings.Denoiser != Denoiser::SVGF) + return; + + // Shameless word by word copy of jiaye's settings + if (ImGui::CollapsingHeader("SVGF")) { + if (ImGui::BeginTabBar("svgf_tabbar")) { + DrawSVGFInternalSettings("Diffuse", settings.SVGFDiffuse); + DrawSVGFInternalSettings("Specular", settings.SVGFSpecular); + + ImGui::EndTabBar(); + } + } +} + +void Raytracing::DrawSVGFInternalSettings(const char* name, SVGFPipeline::Settings& svgfSettings) +{ + if (ImGui::BeginTabItem(name)) { + ImGui::SliderInt("Alpha Frames", (int*)&svgfSettings.AlphaFrames, 1, 64, "%d", ImGuiSliderFlags_AlwaysClamp); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Temporal feedback frames for color."); + + ImGui::SliderInt("Moments Alpha Frames", (int*)&svgfSettings.MomentsAlphaFrames, 1, 64, "%d", ImGuiSliderFlags_AlwaysClamp); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Temporal feedback frames for moments."); + + ImGui::SliderInt("À Trous Iterations", (int*)&svgfSettings.AtrousIterations, 1, 5, "%d", ImGuiSliderFlags_AlwaysClamp); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Number of À Trous wavelet filter iterations. More iterations yield smoother results but may blur details and have a higher computational cost."); + + ImGui::SliderFloat("Color Phi", &svgfSettings.ColorPhi, 0.01f, 32.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Controls sensitivity to color differences in the À Trous filter. Lower values preserve more detail but may retain noise."); + + ImGui::SliderFloat("Normal Phi", &svgfSettings.NormalPhi, 1.0f, 1024.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Controls sensitivity to normal differences in the À Trous filter. Higher values preserve more detail but may retain noise."); + + ImGui::SliderFloat("Depth Phi", &svgfSettings.DepthPhi, 0.001f, 0.2f, "%.3f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Controls sensitivity to depth differences in the À Trous filter. Higher values preserve more detail but may retain noise."); + + ImGui::SliderFloat("Depth Threshold", &svgfSettings.DepthThreshold, 0.0f, 1.0f, "%.3f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Depth rejection difference. Lower values are more agressive."); + + ImGui::SliderInt("Normal Threshold", (int*)&svgfSettings.NormalThreshold, 0, 90, "%dº", ImGuiSliderFlags_AlwaysClamp); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Normal rejection difference in dregrees. Lower values are more agressive."); + + ImGui::SliderInt("History Threshold", (int*)&svgfSettings.HistoryThreshold, 0, 8, "%d", ImGuiSliderFlags_AlwaysClamp); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Minimal accumulated frames before applying variance filter. Lower is faster but has less filtering."); + + ImGui::Checkbox("Variance", &svgfSettings.Variance); + + ImGui::Checkbox("Spatial", &svgfSettings.Spatial); + + ImGui::EndTabItem(); + } +} + +#ifdef DLSS_RR +void Raytracing::DrawDLSSRRSettings() +{ + if (settings.Denoiser != Denoiser::DLSSRR) + return; + + if (ImGui::CollapsingHeader("DLSS RR")) { + auto& dlssrrSettings = settings.DLSSRR; + + DrawEnumCombo("Quality Mode", dlssrrSettings.QualityMode); + DrawEnumRadio("Preset", dlssrrSettings.Preset); + } +} +#endif + +void Raytracing::DrawDenoiserSettings() +{ + DrawSVGFSettings(); +#ifdef DLSS_RR + DrawDLSSRRSettings(); +#endif + if (settings.Denoiser == Denoiser::Accumulation && settings.PathTracing) { + if (ImGui::CollapsingHeader("Accumulation")) { + ImGui::Text("Accumulated Frames: %d", accumulatedFrames); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Number of frames accumulated for denoising.\nAccumulation resets when camera moves."); + } + + if (cameraHasMoved) { + ImGui::TextColored({ 1.0f, 0.5f, 0.0f, 1.0f }, "Camera is moving - resetting accumulation"); + } + } + } +} +//ResolutionMode + +void Raytracing::DrawResolutionSettings() +{ + bool disabled = false; + + // DLSS RR manages RT resolution itself +#ifdef DLSS_RR + if (settings.Denoiser == Denoiser::DLSSRR) { + ImGui::TextColored({ 1.0f, 0.0f, 0.0f, 1.0f }, "*DLSS Ray Reconstruction manages resolution via the 'Quality Mode' setting*"); + disabled |= true; + } +#endif + + if (disabled) + ImGui::BeginDisabled(); + + DrawEnumRadio("Resolution", settings.Resolution, "Controls the Ray Tracing resolution. Lower resolutions cast fewer rays, which improves performance but greatly reduces detail and clarity."); + + if (disabled) + ImGui::EndDisabled(); +} + +void Raytracing::DrawLightingSettings() +{ + if (ImGui::CollapsingHeader("Lighting")) { + if (ImGui::DragFloat("Emissive Strength", &settings.Emissive, 0.001f)) + settings.Emissive = std::max(0.0f, settings.Emissive); + + if (ImGui::DragFloat("Effect Strength", &settings.Effect, 0.001f)) + settings.Effect = std::max(0.0f, settings.Effect); + + if (ImGui::DragFloat("Sky Strength", &settings.Sky, 0.001f)) + settings.Sky = std::max(0.0f, settings.Sky); + + DrawLightSettings(); + } +} + +void Raytracing::DrawLightSettings() +{ + if (ImGui::CollapsingHeader("Lights")) { + if (ImGui::TreeNodeEx("Direct Lights", ImGuiTreeNodeFlags_DefaultOpen)) { + if (ImGui::DragFloat("Directional Strength", &settings.Directional, 0.001f)) + settings.Directional = std::max(0.0f, settings.Directional); + + if (ImGui::DragFloat("Point Strength", &settings.Point, 0.001f)) + settings.Point = std::max(0.0f, settings.Point); + + ImGui::Checkbox("Lod Dimmer", &settings.LodDimmer); + + ImGui::Checkbox("Raytraced Shadows", &settings.RaytracedShadows); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Replaces directional light shadowmaps.\n"); + } + + ImGui::Checkbox("Cull Shadows", &settings.CullShadows); + + ImGui::TreePop(); + } + } +} + +void Raytracing::DrawGeneralSettings() +{ + if (!ImGui::BeginTabItem("General")) + return; + + ImGui::PushID("GeneralSettings"); + + ImGui::Checkbox("Enabled", &settings.Enabled); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Enable Ray-Traced Global Illumination."); + } + + ImGui::Checkbox("Global Illumination", &settings.GlobalIllumination); + + if (DrawEnumRadio("TraceMode", settings.TraceMode, nullptr, TraceModeTooltips)) + recompileReason |= RecompileReason::General; + + if (DrawEnumRadio("Denoiser", settings.Denoiser)) + recompileReason |= RecompileReason::General; + + DrawResolutionSettings(); + + // Bounces + { + int bounces = settings.Bounces; + + if (ImGui::SliderInt("Bounces", &settings.Bounces, 1, 32)) + settings.Bounces = std::clamp(settings.Bounces, 1, 32); + + if (bounces != settings.Bounces) + recompileReason |= RecompileReason::General; + } + + // Samples Per Pixel + { + int samples = settings.SamplesPerPixel; + + if (ImGui::SliderInt("Samples Per Pixel", &settings.SamplesPerPixel, 1, 32)) + settings.SamplesPerPixel = std::clamp(settings.SamplesPerPixel, 1, 32); + + if (samples != settings.SamplesPerPixel) + recompileReason |= RecompileReason::General; + } + + DrawFloat2("Roughness", settings.Roughness); + DrawFloat2("Metalness", settings.Metalness); + + DrawSHaRCSettings(); + + DrawDenoiserSettings(); + + DrawLightingSettings(); + + if (ImGui::Checkbox("Path Tracing", &settings.PathTracing)) { + recompileReason |= RecompileReason::General; + } + + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Experimental Path Tracing mode.\n"); + } + + /*ImGui::Checkbox("Recompress Textures", &settings.DebugShare); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Some texture formats cannot be shared between APIs, enabling this option ensures they'll be recompressed in a lower quality yet compatible format.\n"); + }*/ + + ImGui::Checkbox("Russian Roulette", &settings.RussianRoulette); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Enable Russian Roulette termination for ray paths to improve performance at the cost of some variance.\n"); + } + + ImGui::Checkbox("Convert To Gamma", &settings.ConvertToGamma); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Convert the final raytraced output to gamma space.\n"); + } + + ImGui::PopID(); + + ImGui::EndTabItem(); +} + +void Raytracing::DrawSSSSettings() +{ + auto& sssSettings = settings.AdvancedSettings.SSSSettings; + + if (ImGui::Checkbox("Enable Subsurface Scattering", &sssSettings.Enabled)) + recompileReason |= RecompileReason::Advanced; + + if (!sssSettings.Enabled) + return; + + if (ImGui::CollapsingHeader("Subsurface Scattering")) { + if (sssSettings.Enabled) { + ImGui::SliderInt("Sample Count", &sssSettings.SampleCount, 1, 16); + ImGui::SliderFloat("Max Sample Radius", &sssSettings.MaxSampleRadius, 0.01f, 64.0f, "%.2f"); + ImGui::Checkbox("Enable Transmission", &sssSettings.EnableTransmission); + ImGui::Checkbox("Material Override", &sssSettings.MaterialOverride); + + if (sssSettings.MaterialOverride) { + if (ImGui::TreeNodeEx("Subsurface Scattering", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::ColorEdit3("Override Transmission Color", reinterpret_cast(&sssSettings.OverrideTransmissionColor), ImGuiColorEditFlags_Float); + ImGui::ColorEdit3("Override Scattering Color", reinterpret_cast(&sssSettings.OverrideScatteringColor), ImGuiColorEditFlags_Float); + ImGui::SliderFloat("Override Scale", &sssSettings.OverrideScale, 0.01f, 1000.0f, "%.2f"); + ImGui::SliderFloat("Override Anisotropy", &sssSettings.OverrideAnisotropy, -0.99f, 0.99f); + + ImGui::TreePop(); + } + } + } + } +} + +void Raytracing::DrawAdvancedSettings() +{ + if (!ImGui::BeginTabItem("Advanced")) + return; + + ImGui::PushID("AdvancedSettings"); + + auto& advSettings = settings.AdvancedSettings; + + if (ImGui::TreeNodeEx("Culling", ImGuiTreeNodeFlags_DefaultOpen)) { + DrawEnumRadio("Culling", advSettings.Culling.Mode, nullptr, CullingModeTooltips); + + ImGui::SliderInt("Minimal Radius", &advSettings.Culling.MinRadius, 0, 10, "%d", ImGuiSliderFlags_AlwaysClamp); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Nodes with a radius lower than this value are culled when outside the view.\n"); + } + + DrawEnumRadio("Distance Culling Mode", advSettings.Culling.DistanceMode, nullptr, CullingDistanceModeTooltips); + + if (advSettings.Culling.DistanceMode == CullingDistanceMode::Minimal) { + ImGui::InputInt("Minimal Distance", &advSettings.Culling.MinDistance); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Distance to cull when outside the view regardless of radius.\n"); + } + } else { + ImGui::InputInt("Starting Distance", &advSettings.Culling.StartDistance); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Minimal distance to start modulating radius.\n"); + } + + ImGui::SliderFloat("Distance Ratio", &advSettings.Culling.DistanceRatio, 0.1f, 1.0f, "%.2f", ImGuiSliderFlags_AlwaysClamp); + } + + ImGui::TreePop(); + } + + ImGui::Checkbox("Variable Update Rate", &advSettings.VariableUpdateRate); + + + if (ImGui::Checkbox("Resampled Importance Sampling", &advSettings.RIS.Enabled)) + recompileReason |= RecompileReason::Advanced; + + ImGui::SliderInt("RIS Max Candidates", &advSettings.RIS.MaxCandidates, 2, 16); + + ImGui::SliderFloat("Texture LOD Bias", &settings.TexLODBias, -4.0f, 4.0f, "%.1f"); + + if (ImGui::Checkbox("GGX Energy Conservation", &advSettings.GGXEnergyConservation)) + recompileReason |= RecompileReason::Advanced; + + if (DrawEnumCombo("Hair BSDF", advSettings.HairBSDF)) + recompileReason |= RecompileReason::Advanced; + + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Best with hair specular feature enabled.\n"); + } + + DrawSSSSettings(); + + if (DrawEnumCombo("Diffuse BRDF", advSettings.DiffuseBRDF)) + recompileReason |= RecompileReason::Advanced; + + if (DrawEnumRadio("Light Evaluation Mode", advSettings.LightEvalMode, nullptr, LightEvalModeTooltips)) + recompileReason |= RecompileReason::Advanced; + + if (DrawEnumRadio("Lighting Mode", advSettings.LightingMode, nullptr, LightingModeTooltips)) + recompileReason |= RecompileReason::Advanced; + + ImGui::Checkbox("Interior Sun", &settings.InteriorSun); + + ImGui::PopID(); + + ImGui::EndTabItem(); +} + +void Raytracing::DrawDebugSettings() +{ + if (!ImGui::BeginTabItem("Debug")) + return; + + ImGui::PushID("DebugSettings"); + + if (ImGui::TreeNodeEx("Skinning and DynamicTriShapes", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::Checkbox("Disable Skinning", &settings.DisableSkinned); + + if (ImGui::Checkbox("Use Optimized Mapping", &skinningPipeline->settings.OptimizedMapping)) + skinningPipeline->recompile = true; + + if (ImGui::SliderInt("Thread Group Size", (int*)&skinningPipeline->settings.ThreadGroupSize, + SkinningPipeline::MIN_THREAD_GROUP_SIZE, SkinningPipeline::MAX_THREAD_GROUP_SIZE, "%d", ImGuiSliderFlags_AlwaysClamp)) + skinningPipeline->recompile = true; + + ImGui::TreePop(); + } + + ImGui::Checkbox("Disable Texture Sharing", &debugDisableTextureSharing); + + ImGui::InputText("Shader Defines", &debugDefines); + + ImGui::SameLine(); + + if (ImGui::Button("Recompile")) + recompileReason |= RecompileReason::Debug; + + if (ImGui::Checkbox("White Furnace", &settings.WhiteFurnace)) + recompileReason |= RecompileReason::Debug; + + // Debug display mode + if (ImGui::BeginCombo("Debug Output", magic_enum::enum_name(settings.DebugOutput).data())) { + for (auto& value : magic_enum::enum_values()) { + bool isSelected = (settings.DebugOutput == value); + + if (ImGui::Selectable(magic_enum::enum_name(value).data(), isSelected)) + settings.DebugOutput = value; + + if (isSelected) + ImGui::SetItemDefaultFocus(); + } + + ImGui::EndCombo(); + } + + ImGui::Checkbox("Enable PIX Capture", &settings.EnablePIXCapture); + { + if (settings.EnablePIXCapture) { + if (ImGui::TreeNodeEx("Pix Capture", ImGuiTreeNodeFlags_DefaultOpen)) { + //Pix Capture Location + { + int pixCapLocation = static_cast(settings.PIXCaptureLocation); + ImGui::TextUnformatted("PIX Capture"); + + ImGui::SameLine(); + ImGui::Dummy(ImVec2(25, 0)); + + for (auto& [value, name] : magic_enum::enum_entries()) { + ImGui::SameLine(); + ImGui::RadioButton(name.data(), &pixCapLocation, static_cast(value)); + } + + settings.PIXCaptureLocation = static_cast(pixCapLocation); + } + + if (ImGui::Button("Single Frame Capture")) { + pixCapture = true; + pixCaptureStarted = false; + } + + if (ImGui::Button("Start MultiFrame Capture")) { + pixCapture = true; + pixCaptureStarted = false; + pixMultiFrame = true; + } + + if (pixCapture && pixCaptureStarted && pixMultiFrame && ImGui::Button("End MultiFrame Capture")) { + pixMultiFrame = false; + } + + if (ImGui::Button("Start TRD Capture")) { + pixCapture = true; + pixCaptureStarted = false; + pixTDR = true; + } + + ImGui::TreePop(); + } + } + } + + ImGui::Checkbox("Enabled Debug Device", &settings.EnableDebugDevice); + + ImGui::Checkbox("MSN Visualization", &debugNormalMap); + + if (debugNormalMap) { + if (normalMaps.empty()) { + ImGui::Text("No normal maps converted."); + } else { + eastl::vector> normalMapVector; + + for (auto& [msNormal, convertedNormal] : normalMaps) { + normalMapVector.emplace_back(msNormal, convertedNormal.get()); + } + + auto normalMapsCount = static_cast(normalMapVector.size()); + debugNormalMapIndex = std::min(debugNormalMapIndex, normalMapsCount); + + if (ImGui::BeginCombo("NormalMap", std::to_string(debugNormalMapIndex).c_str())) { + for (uint i = 0; i < normalMapsCount; i++) { + bool isSelected = debugNormalMapIndex == i; + + auto& [msNormal, convertedNormal] = normalMapVector.at(i); + + if (!convertedNormal->OriginalSRV) + continue; + + if (!convertedNormal) + continue; + + if (!convertedNormal->converted) + continue; + + if (!convertedNormal->Texture || !convertedNormal->Texture->srv || !convertedNormal->Texture->srv.get()) + continue; + + if (ImGui::Selectable(std::to_string(i).c_str(), isSelected)) + debugNormalMapIndex = i; + + if (isSelected) + ImGui::SetItemDefaultFocus(); + } + + ImGui::EndCombo(); + } + + auto& [msNormal, convertedNormal] = normalMapVector.at(debugNormalMapIndex); + + if (convertedNormal && convertedNormal->converted && convertedNormal->OriginalSRV && convertedNormal->Texture && convertedNormal->Texture->srv && convertedNormal->Texture->srv.get()) { + ImGui::Image(convertedNormal->OriginalSRV, ImVec2(256, 256)); + ImGui::SameLine(); + ImGui::Image(convertedNormal->Texture->srv.get(), ImVec2(256, 256)); + } + } + } + + ImGui::Checkbox("Sky Hemisphere Visualization", &debugSkyHemi); + + if (debugSkyHemi) + ImGui::Image(skyHemisphere->srv, ImVec2(512, 512)); + + if (ImGui::TreeNodeEx("Statistics", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::Text(std::format("Lights: {}", lights.size()).c_str()); + + ImGui::Text(std::format("Used Textures: {}, Shared: {}", textureRegisters.UsedCount(), textures.size()).c_str()); + ImGui::Text(std::format("Used Shapes: {}", shapeRegisters.UsedCount()).c_str()); + ImGui::Text(std::format("Models: {}", models.size()).c_str()); + + auto instanceCount = instances.size(); + + if (ImGui::TreeNodeEx(std::format("Instances: {}", instanceCount).c_str())) { + for (auto& [root, instance] : instances) { + ImGui::Text(std::format("{}, Detached: {}", std::string_view{ instance.filename }, instance.IsDetached()).c_str()); + } + } + + if (settings.GlobalIllumination) { + auto blasInstancesCount = blasInstances.size(); + ImGui::Text(std::format("GI Unculled: {}, Culled: {}", blasInstancesCount, instanceCount - blasInstancesCount).c_str()); + } + + if (RaytracedShadows()) { + auto blasInstancesCount = blasShadowInstances.size(); + ImGui::Text(std::format("Shadow Unculled: {}, Culled: {}", blasInstancesCount, instanceCount - blasInstancesCount).c_str()); + } + + ImGui::TreePop(); + } + + ImGui::PopID(); + + ImGui::EndTabItem(); +} + +void Raytracing::DrawOverlay() +{ + auto* menu = Menu::GetSingleton(); + + if (!globals::state || !menu) + return; + + // Set window flags - no decoration and only movable when ShowBorder is true + ImGuiWindowFlags windowFlags = ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_AlwaysAutoResize; + + // Only allow mouse interaction when the main menu is open + if (!menu->IsEnabled) { + windowFlags |= ImGuiWindowFlags_NoInputs; + } + + ImGui::Begin("Raytracing Overlay", NULL, windowFlags); + + auto DrawRow = [](const char* label, size_t instances, float cpums, float gpums, [[maybe_unused]] double frameTime = 0.0f) { + ImGui::TableNextRow(); + + ImGui::TableNextColumn(); + ImGui::Text(label); + + ImGui::TableNextColumn(); + ImGui::Text("%zu", instances); + + ImGui::TableNextColumn(); + ImGui::Text("%g ms", cpums); + + ImGui::TableNextColumn(); + ImGui::Text("%g ms", gpums); + }; + + if (ImGui::BeginTable("Effects", 4, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) { + ImGui::TableSetupColumn("Effect"); + ImGui::TableSetupColumn("Instances"); + ImGui::TableSetupColumn("CPU"); + ImGui::TableSetupColumn("GPU"); + ImGui::TableHeadersRow(); + + if (RaytracedShadows()) + DrawRow("Shadows", blasShadowInstances.size(), shadowsCPUTime, shadowsGPUTime); + + // GI/PT + DrawRow(settings.PathTracing ? "Path Tracing" : "Global Illumination", blasInstances.size(), mainCPUTime, mainGPUTime); + + // Denoiser + //DrawRow(settings.PathTracing ? "Denoiser", blasInstances.size(), 0); + + ImGui::EndTable(); + } + + if (settings.PathTracing && settings.Denoiser == Denoiser::Accumulation) { + ImGui::Separator(); + ImGui::Text("Accumulation Frames: %d", accumulatedFrames); + } + + ImGui::End(); +} + +void Raytracing::SetupOutputRT() +{ + logger::info("[RT] SetupOutputRT - RenderSize: {}x{}", renderSize.x, renderSize.y); + + auto createRT = [&](eastl::unique_ptr& texture, DXGI_FORMAT format, GIHeapDef::Slot slot, LPCWSTR name) { + if (texture) + texture.reset(); + + texture = eastl::make_unique(d3d12Device.get(), renderSize.x, renderSize.y, format, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + texture->SetName(name); + texture->CreateUAV(giHeap->CPUHandle(slot)); + texture->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + }; + + // u0 - Output texture + createRT(outputTexture, DXGI_FORMAT_R16G16B16A16_FLOAT, GIHeap::Slot::Output, L"Output texture"); + + // u1 - Diffuse Albedo Path Tracing texture + createRT(diffuseAlbedoPathTracingTexture, DXGI_FORMAT_R8G8B8A8_UNORM, GIHeap::Slot::DiffuseAlbedoPathTracing, L"Diffuse Albedo Path Tracing texture"); + + // u2 - Normal Roughness Path Tracing texture + createRT(normalRoughnessPathTracingTexture, DXGI_FORMAT_R16G16B16A16_SNORM, GIHeap::Slot::NormalRoughnessPathTracing, L"Normal Roughness Path Tracing texture"); + + // u3 - Reflectance texture + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = renderSize.x; + texDesc.Height = renderSize.y; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + specularAlbedoTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(specularAlbedoTexture->resource->SetName(L"Specular Albedo texture")); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uavDesc.Format = texDesc.Format; + + d3d12Device->CreateUnorderedAccessView(specularAlbedoTexture->resource.get(), nullptr, &uavDesc, giHeap->CPUHandle(GIHeap::Slot::Reflectance)); + + const auto& barrier = CD3DX12_RESOURCE_BARRIER::Transition(specularAlbedoTexture->resource.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + commandList->ResourceBarrier(1, &barrier); + + //createRT(specularAlbedoTexture, DXGI_FORMAT_R16G16B16A16_FLOAT, GIHeap::Slot::Reflectance, L"Reflectance texture"); + } + + // u4 - Specular Hit Distance texture + createRT(specularHitDistanceTexture, DXGI_FORMAT_R32_FLOAT, GIHeap::Slot::SpecularHitDist, L"Specular Hit Distance texture"); + + // Motion vector + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = renderSize.x; + texDesc.Height = renderSize.y; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R16G16_FLOAT; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + motionVectorsTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(motionVectorsTexture->resource->SetName(L"Motion Vectors Texture")); + } + + // Normal Roughness + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = renderSize.x; + texDesc.Height = renderSize.y; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R16G16B16A16_SNORM; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + normalRoughnessTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(normalRoughnessTexture->resource->SetName(L"Normal Roughness Texture")); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = texDesc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = texDesc.MipLevels; + srvDesc.Texture2D.PlaneSlice = 0; + srvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + d3d12Device->CreateShaderResourceView(normalRoughnessTexture->resource.get(), &srvDesc, giHeap->CPUHandle(GIHeap::Slot::NormalRoughness)); + } + + // Diffuse (Metallic modulated albedo) + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = renderSize.x; + texDesc.Height = renderSize.y; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + diffuseAlbedoTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(diffuseAlbedoTexture->resource->SetName(L"Diffuse Texture Texture")); + } + + svgfDenoiser->SetupTextureResources(renderSize); + + renderResData->RenderRes = renderSize; + renderResData->RenderResRcp = float2(1.0f / static_cast(renderSize.x), 1.0f / static_cast(renderSize.y)); + + renderResCB->Update(renderResData.get(), sizeof(RenderResData)); +} + +void Raytracing::SetupResources() +{ +#if defined(DLSS_RR) + InitRR(); +#endif + + auto renderer = globals::game::renderer; + auto device = globals::d3d::device; + + normalMapConverter = eastl::make_unique(); + + auto device12 = d3d12Device.get(); + + giHeap = eastl::make_unique>( + device12, + D3D12_DESCRIPTOR_HEAP_DESC(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, GIHeap::NumDescriptors(), D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + + shadowHeap = eastl::make_unique>( + device12, + D3D12_DESCRIPTOR_HEAP_DESC(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, ShadowsHeap::NumDescriptors(), D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + + for (auto& pipeline : GetPipelines()) { + pipeline->Initialize(); + pipeline->CreateRootSignature(device12); + pipeline->CompileShaders(device12); + pipeline->SetupResources(device12); + } + + sharcPipeline->CreateUAVs( + giHeap->CPUHandle(GIHeap::Slot::SHaRCHashEntries), + giHeap->CPUHandle(GIHeap::Slot::SHaRCLock), + giHeap->CPUHandle(GIHeap::Slot::SHaRCAccumulation), + giHeap->CPUHandle(GIHeap::Slot::SHaRCResolved)); + + // Not a standard DX12 pipeline + svgfDenoiser = eastl::make_unique(); + svgfDenoiser->SetupResources(); + + renderResData = eastl::make_unique(); + + // Constant buffers + auto cbDesc = ConstantBufferDesc(); + renderResCB = eastl::make_unique(cbDesc); + + accumulationCBData = eastl::make_unique(); + auto accCbDesc = ConstantBufferDesc(); + accumulationCB = eastl::make_unique(accCbDesc); + + // Setup default textures (this is a bit wordy...) + { + uint8_t white[] = { 255u, 255u, 255u, 255u }; + uint8_t gray[] = { 128u, 128u, 128u, 255u }; + uint8_t normal[] = { 128u, 128u, 255u, 255u }; + uint8_t black[] = { 0u, 0u, 0u, 0u }; + uint8_t rmaos[] = { 128u, 0u, 255u, 255u }; + uint8_t detail[] = { 63u, 64u, 63u, 255u }; + + defaultWhiteTexture = eastl::make_shared(d3d12Device.get(), textureRegisters.Allocate()); + defaultGrayTexture = eastl::make_shared(d3d12Device.get(), textureRegisters.Allocate()); + defaultNormalTexture = eastl::make_shared(d3d12Device.get(), textureRegisters.Allocate()); + defaultBlackTexture = eastl::make_shared(d3d12Device.get(), textureRegisters.Allocate()); + defaultRMAOSTexture = eastl::make_shared(d3d12Device.get(), textureRegisters.Allocate()); + defaultDetailTexture = eastl::make_shared(d3d12Device.get(), textureRegisters.Allocate()); + + defaultWhiteTexture->CreateSRV(giHeap.get(), GIHeapDef::Slot::Textures); + defaultGrayTexture->CreateSRV(giHeap.get(), GIHeapDef::Slot::Textures); + defaultNormalTexture->CreateSRV(giHeap.get(), GIHeapDef::Slot::Textures); + defaultBlackTexture->CreateSRV(giHeap.get(), GIHeapDef::Slot::Textures); + defaultRMAOSTexture->CreateSRV(giHeap.get(), GIHeapDef::Slot::Textures); + defaultDetailTexture->CreateSRV(giHeap.get(), GIHeapDef::Slot::Textures); + + defaultWhiteTexture->UpdateAndUpload(commandList.get(), white); + defaultGrayTexture->UpdateAndUpload(commandList.get(), gray); + defaultNormalTexture->UpdateAndUpload(commandList.get(), normal); + defaultBlackTexture->UpdateAndUpload(commandList.get(), black); + defaultRMAOSTexture->UpdateAndUpload(commandList.get(), rmaos); + defaultDetailTexture->UpdateAndUpload(commandList.get(), detail); + } + + auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + + D3D11_TEXTURE2D_DESC mainDesc; + mainTex.texture->GetDesc(&mainDesc); + + // Depth + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = mainDesc.Width; + texDesc.Height = mainDesc.Height; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R32_FLOAT; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + depthTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(depthTexture->resource->SetName(L"Depth texture")); + + auto barrier = CD3DX12_RESOURCE_BARRIER::Transition(depthTexture->resource.get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = texDesc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = texDesc.MipLevels; + srvDesc.Texture2D.PlaneSlice = 0; + srvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + d3d12Device->CreateShaderResourceView(depthTexture->resource.get(), &srvDesc, giHeap->CPUHandle(GIHeap::Slot::Depth)); + d3d12Device->CreateShaderResourceView(depthTexture->resource.get(), &srvDesc, shadowHeap->CPUHandle(ShadowsHeap::Slot::Depth)); + } + + // Shadow mask + { + auto shadowMask = globals::game::renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kSHADOW_MASK]; + D3D11_TEXTURE2D_DESC shadowMaskDesc; + shadowMask.texture->GetDesc(&shadowMaskDesc); + + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = mainDesc.Width; + texDesc.Height = mainDesc.Height; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = shadowMaskDesc.Format; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + shadowMaskTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(shadowMaskTexture->resource->SetName(L"Shadow Mask")); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uavDesc.Format = texDesc.Format; + + d3d12Device->CreateUnorderedAccessView(shadowMaskTexture->resource.get(), nullptr, &uavDesc, shadowHeap->CPUHandle(ShadowsHeap::Slot::ShadowMask)); + } + + if (UpdateRenderSize()) + SetupOutputRT(); + + // UAVs + { + // u0 - Final texture + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = mainDesc.Width; + texDesc.Height = mainDesc.Height; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + mainTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(mainTexture->resource->SetName(L"Main Texture")); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = texDesc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = texDesc.MipLevels; + srvDesc.Texture2D.PlaneSlice = 0; + srvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + d3d12Device->CreateShaderResourceView(mainTexture->resource.get(), &srvDesc, giHeap->CPUHandle(GIHeap::Slot::Main)); + + auto barrier = CD3DX12_RESOURCE_BARRIER::Transition(mainTexture->resource.get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier); + } + + // Accumulation buffer for path tracing denoiser + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = mainDesc.Width; + texDesc.Height = mainDesc.Height; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + accumulationTexture = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(accumulationTexture->resource->SetName(L"Accumulation Texture")); + + accumulationTextureCopy = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(accumulationTextureCopy->resource->SetName(L"Accumulation Texture Copy")); + } + } + + // Light buffer + { + lightBuffer = eastl::make_unique>(d3d12Device.get(), RTConstants::MAX_LIGHTS); + lightBuffer->SetName(L"Light Buffer"); + + lightBuffer->CreateSRV(giHeap->CPUHandle(GIHeap::Slot::Lights)); + } + + // Shape buffer + { + shapeBuffer = eastl::make_unique>(d3d12Device.get(), RTConstants::MAX_SHAPES); + shapeBuffer->SetName(L"Shape Buffer"); + + shapeBuffer->CreateSRV(giHeap->CPUHandle(GIHeap::Slot::Shapes)); + + DX::ThrowIfFailed(shapeBuffer->UploadResource()->Map(0, nullptr, reinterpret_cast(&shapeData))); + } + + // Instance buffer + { + instanceBuffer = eastl::make_unique>(d3d12Device.get(), RTConstants::MAX_INSTANCES); + instanceBuffer->SetName(L"Instance Buffer"); + + instanceBuffer->CreateSRV(giHeap->CPUHandle(GIHeap::Slot::Instances)); + } + + // Geometry transform buffer + { + transformBuffer = eastl::make_unique>(d3d12Device.get(), RTConstants::MAX_TRANSFORMS); + transformBuffer->SetName(L"Transform Buffer"); + + transformBuffer->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } + + // Create instance buffer for BLAS + { + blasInstanceBuffer = eastl::make_unique>(d3d12Device.get(), RTConstants::MAX_INSTANCES, false); + blasInstanceBuffer->SetName(L"BLAS Instance Buffer"); + + blasInstanceBuffer->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } + + // Create shadow instance buffer for BLAS + { + blasShadowInstanceBuffer = eastl::make_unique>(d3d12Device.get(), RTConstants::MAX_INSTANCES, false); + blasShadowInstanceBuffer->SetName(L"BLAS Instance Buffer"); + + blasShadowInstanceBuffer->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } + + logger::debug("Creating constant buffer..."); + { + frameBuffer = eastl::make_unique>(d3d12Device.get(), 1, false, 2); + frameBuffer->SetName(L"Frame Buffer"); + + frameBuffer->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + + frameData = eastl::make_unique(); + + shadowsCB = eastl::make_unique>(d3d12Device.get(), 1, false); + shadowsCB->SetName(L"Shadows Constant Buffer"); + + shadowsCB->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); + + shadowsCBData = eastl::make_unique(); + } + + logger::debug("Creating samplers..."); + { + D3D11_SAMPLER_DESC samplerDesc = { + .Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR, + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .MaxAnisotropy = 1, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX + }; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, samplerState.put())); + } + + // Sky Hemisphere + { + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = RTConstants::SKY_HEMI_SIZE; + texDesc.Height = RTConstants::SKY_HEMI_SIZE; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + skyHemisphere = eastl::make_unique(texDesc, d3d11Device.get(), d3d12Device.get()); + DX::ThrowIfFailed(skyHemisphere->resource->SetName(L"Sky Hemisphere")); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = texDesc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = texDesc.MipLevels; + srvDesc.Texture2D.PlaneSlice = 0; + srvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + d3d12Device->CreateShaderResourceView(skyHemisphere->resource.get(), &srvDesc, giHeap->CPUHandle(GIHeap::Slot::SkyHemisphere)); + + // Setup TESWaterReflections + waterReflections = RE::NiPointer(new RE::TESWaterReflections()); + + waterReflections->flags.set(true, RE::TESWaterReflections::Flags::kDirty, RE::TESWaterReflections::Flags::kDynamicCubemap, RE::TESWaterReflections::Flags::kWorldOrigin); + + for (uint i = 0; i < 6; i++) { + waterReflections->cubeMapSides[i] = RE::TESWaterReflections::CubeMapSide(i, 0.0f); + } + } + + fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + + if (fenceEvent == nullptr) { + DX::ThrowIfFailed(HRESULT_FROM_WIN32(GetLastError())); + } + + CompileShaders(); +} + +#ifdef DLSS_RR +void Raytracing::InitRR() +{ + std::wstring interposerPath = L"Data\\Shaders\\Upscaling\\Streamline\\sl.interposer.dll"; + interposer = LoadLibraryW(interposerPath.c_str()); + if (interposer == nullptr) { + DWORD errorCode = GetLastError(); + logger::info("[Streamline] Failed to load interposer: Error Code {0:x}", errorCode); + return; + } else { + logger::info("[Streamline] Interposer loaded at address: {0:p}", static_cast(interposer)); + } + + sl::Preferences pref; + + sl::Feature featuresToLoad[] = { sl::kFeatureDLSS_RR }; + + pref.featuresToLoad = featuresToLoad; + pref.numFeaturesToLoad = _countof(featuresToLoad); + + pref.showConsole = false; + + pref.engine = sl::EngineType::eCustom; + pref.engineVersion = "1.0.0"; + pref.projectId = "f8776929-c969-43bd-ac2b-294b4de58aac"; + + pref.renderAPI = sl::RenderAPI::eD3D12; + pref.flags = sl::PreferenceFlags::eUseManualHooking; + //sl::PreferenceFlags::eUseFrameBasedResourceTagging; + + pref.logLevel = sl::LogLevel::eOff; + + slInit = (PFun_slInit*)GetProcAddress(interposer, "slInit"); + slGetNewFrameToken = (PFun_slGetNewFrameToken*)GetProcAddress(interposer, "slGetNewFrameToken"); + slSetD3DDevice = (PFun_slSetD3DDevice*)GetProcAddress(interposer, "slSetD3DDevice"); + slEvaluateFeature = (PFun_slEvaluateFeature*)GetProcAddress(interposer, "slEvaluateFeature"); + slSetConstants = (PFun_slSetConstants*)GetProcAddress(interposer, "slSetConstants"); + slGetFeatureFunction = (PFun_slGetFeatureFunction*)GetProcAddress(interposer, "slGetFeatureFunction"); + slSetTag = (PFun_slSetTag*)GetProcAddress(interposer, "slSetTag"); + + if (SL_FAILED(res, slInit(pref, sl::kSDKVersion))) { + logger::critical("[Streamline] Failed to initialize Streamline"); + } else { + logger::info("[Streamline] Successfully initialized Streamline"); + } + + slSetD3DDevice((void*)d3d12Device.get()); + + slGetFeatureFunction(sl::kFeatureDLSS_RR, "slDLSSDGetOptimalSettings", (void*&)slDLSSDGetOptimalSettings); + slGetFeatureFunction(sl::kFeatureDLSS_RR, "slDLSSDGetState", (void*&)slDLSSDGetState); + slGetFeatureFunction(sl::kFeatureDLSS_RR, "slDLSSDSetOptions", (void*&)slDLSSDSetOptions); +} + +int32_t Raytracing::GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth) +{ + const float basePhaseCount = 8.0f; + const int32_t jitterPhaseCount = int32_t(basePhaseCount * pow((float(displayWidth) / renderWidth), 2.0f)); + return jitterPhaseCount; +} + +// Calculate halton number for index and base. +float Raytracing::Halton(int32_t index, int32_t base) +{ + float f = 1.0f, result = 0.0f; + + for (int32_t currentIndex = index; currentIndex > 0;) { + f /= (float)base; + result = result + f * (float)(currentIndex % base); + currentIndex = (uint32_t)(floorf((float)(currentIndex) / (float)(base))); + } + + return result; +} + +void Raytracing::GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount) +{ + const float x = Halton((index % phaseCount) + 1, 2) - 0.5f; + const float y = Halton((index % phaseCount) + 1, 3) - 0.5f; + + *outX = x; + *outY = y; +} + +sl::DLSSMode Raytracing::GetDLSSMode() const +{ + switch (settings.DLSSRR.QualityMode) { + case DLSSRRQuality::MaxPerformance: + return sl::DLSSMode::eMaxPerformance; + break; + case DLSSRRQuality::MaxQuality: + case DLSSRRQuality::NativeRes: + return sl::DLSSMode::eMaxQuality; + break; + case DLSSRRQuality::DLAA: + return sl::DLSSMode::eDLAA; + break; + default: + return sl::DLSSMode::eBalanced; + break; + } +} + +void Raytracing::GetDLSSRROptimal() +{ + auto dlssdOptionsNew = GetDLSSRROptions(); + + if (dlssdOptions.mode != dlssdOptionsNew.mode || dlssdOptions.qualityPreset != dlssdOptionsNew.qualityPreset || dlssdOptions.outputWidth != dlssdOptionsNew.outputWidth || dlssdOptions.outputHeight != dlssdOptionsNew.outputHeight) { + dlssdOptions = dlssdOptionsNew; + + sl::Result result = slDLSSDGetOptimalSettings(dlssdOptions, optimalSettings); + if (result != sl::Result::eOk) { + logger::critical("[RT] Failed to get DLSS RR optimal settings, error code: {}", (int)result); + return; + } + } +} + +sl::DLSSDOptions Raytracing::GetDLSSRROptions() const +{ + sl::DLSSDOptions dlssdOptionsOut{}; + + dlssdOptionsOut.mode = GetDLSSMode(); + + auto screenSize = GetScreenSize(); + + dlssdOptionsOut.outputWidth = screenSize.x; + dlssdOptionsOut.outputHeight = screenSize.y; + + dlssdOptionsOut.colorBuffersHDR = sl::Boolean::eTrue; + dlssdOptionsOut.normalRoughnessMode = sl::DLSSDNormalRoughnessMode::ePacked; + dlssdOptionsOut.alphaUpscalingEnabled = sl::Boolean::eFalse; + + auto preset = (settings.DLSSRR.Preset == DLSSRRPreset::D) ? sl::DLSSDPreset::ePresetD : sl::DLSSDPreset::ePresetE; + + dlssdOptionsOut.dlaaPreset = preset; + dlssdOptionsOut.qualityPreset = preset; + dlssdOptionsOut.balancedPreset = preset; + dlssdOptionsOut.performancePreset = preset; + dlssdOptionsOut.ultraPerformancePreset = preset; + + return dlssdOptionsOut; +} + +void Raytracing::SetDLSSRROptions() +{ + auto worldToCameraView = globals::game::frameBufferCached.GetCameraView().Transpose(); + auto cameraViewToWorld = globals::game::frameBufferCached.GetCameraViewInverse().Transpose(); + + dlssdOptions.worldToCameraView = sl::float4x4{ + sl::float4{ worldToCameraView._11, worldToCameraView._12, worldToCameraView._13, worldToCameraView._14 }, + sl::float4{ worldToCameraView._21, worldToCameraView._22, worldToCameraView._23, worldToCameraView._24 }, + sl::float4{ worldToCameraView._31, worldToCameraView._32, worldToCameraView._33, worldToCameraView._34 }, + sl::float4{ worldToCameraView._41, worldToCameraView._42, worldToCameraView._43, worldToCameraView._44 } + }; + + dlssdOptions.cameraViewToWorld = sl::float4x4{ + sl::float4{ cameraViewToWorld._11, cameraViewToWorld._12, cameraViewToWorld._13, cameraViewToWorld._14 }, + sl::float4{ cameraViewToWorld._21, cameraViewToWorld._22, cameraViewToWorld._23, cameraViewToWorld._24 }, + sl::float4{ cameraViewToWorld._31, cameraViewToWorld._32, cameraViewToWorld._33, cameraViewToWorld._34 }, + sl::float4{ cameraViewToWorld._41, cameraViewToWorld._42, cameraViewToWorld._43, cameraViewToWorld._44 } + }; + + if (SL_FAILED(result, slDLSSDSetOptions(slViewportHandle, dlssdOptions))) { + logger::critical("[DLSS RR] Could not set DLSS RR options"); + return; + } +} + +void Raytracing::CheckFrameConstants() +{ + if (dlssFrameChecker.IsNewFrame()) { + slGetNewFrameToken(frameToken, &globals::state->frameCount); + + auto state = globals::state; + + sl::Constants slConstants = {}; + + if (globals::game::isVR) { + slConstants.cameraAspectRatio = (state->screenSize.x * 0.5f) / state->screenSize.y; + } else { + slConstants.cameraAspectRatio = state->screenSize.x / state->screenSize.y; + } + + slConstants.cameraFOV = Util::GetVerticalFOVRad(); + slConstants.cameraNear = *globals::game::cameraNear; + slConstants.cameraFar = *globals::game::cameraFar; + + auto viewMatrix = globals::game::frameBufferCached.GetCameraViewInverse().Transpose(); + auto cameraViewToClip = globals::game::frameBufferCached.GetCameraProjUnjittered().Transpose(); + + slConstants.cameraMotionIncluded = sl::Boolean::eTrue; + slConstants.cameraPinholeOffset = { 0.f, 0.f }; + slConstants.cameraRight = { viewMatrix._11, viewMatrix._12, viewMatrix._13 }; + slConstants.cameraUp = { viewMatrix._21, viewMatrix._22, viewMatrix._23 }; + slConstants.cameraFwd = { viewMatrix._31, viewMatrix._32, viewMatrix._33 }; + slConstants.cameraPos = *(sl::float3*)&globals::game::frameBufferCached.GetCameraPosAdjust(); + slConstants.cameraViewToClip = *(sl::float4x4*)&cameraViewToClip; + slConstants.depthInverted = sl::Boolean::eFalse; + + recalculateCameraMatrices(slConstants); + + auto screenSize = GetScreenSize(); + auto phaseCount = GetJitterPhaseCount(renderSize.x, screenSize.x); + + GetJitterOffset(&jitter.x, &jitter.y, state->frameCount, phaseCount); + + slConstants.jitterOffset = { -jitter.x, -jitter.y }; + slConstants.reset = sl::Boolean::eFalse; + + slConstants.mvecScale = { (globals::game::isVR ? 0.5f : 1.0f), 1 }; + slConstants.motionVectors3D = sl::Boolean::eFalse; + slConstants.motionVectorsInvalidValue = FLT_MIN; + slConstants.orthographicProjection = sl::Boolean::eFalse; + slConstants.motionVectorsDilated = sl::Boolean::eFalse; + slConstants.motionVectorsJittered = sl::Boolean::eFalse; + + if (SL_FAILED(res, slSetConstants(slConstants, *frameToken, slViewportHandle))) { + logger::error("[Streamline] Could not set constants"); + } + } +} +#endif + +static std::wstring StringViewToWString(std::string_view sv) +{ + std::string str(sv); + + int size_needed = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, nullptr, 0); + + std::wstring wstr(size_needed, 0); + + MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, &wstr[0], size_needed); + + return wstr; +} + +void Raytracing::ShareRT(ID3D11Texture2D* pTexture2D, const GIHeap::Slot& target, const ShadowsHeap::Slot& cTarget, ID3D12Resource** ppResource) const +{ + D3D11_TEXTURE2D_DESC desc; + pTexture2D->GetDesc(&desc); + + winrt::com_ptr dxgiResource; + DX::ThrowIfFailed(pTexture2D->QueryInterface(IID_PPV_ARGS(dxgiResource.put()))); + + HANDLE sharedHandle = nullptr; + DX::ThrowIfFailed(dxgiResource->CreateSharedHandle(nullptr, DXGI_SHARED_RESOURCE_READ, nullptr, &sharedHandle)); // DXGI_SHARED_RESOURCE_WRITE + + DX::ThrowIfFailed(d3d12Device->OpenSharedHandle(sharedHandle, IID_PPV_ARGS(ppResource))); + CloseHandle(sharedHandle); + + /*const auto& barrier = CD3DX12_RESOURCE_BARRIER::Transition(*ppResource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COMMON); + commandList->ResourceBarrier(1, &barrier);*/ + + // Create SRV + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = desc.MipLevels; + srvDesc.Texture2D.PlaneSlice = 0; + srvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + if (target != GIHeap::Slot::None) + d3d12Device->CreateShaderResourceView(*ppResource, &srvDesc, giHeap->CPUHandle(target)); + + if (cTarget != ShadowsHeap::Slot::None) + d3d12Device->CreateShaderResourceView(*ppResource, &srvDesc, shadowHeap->CPUHandle(cTarget)); +} + +void Raytracing::SetupSharedRT() +{ + const auto& rendererRD = globals::game::renderer->GetRuntimeData(); + + ShareRT(rendererRD.renderTargets[ALBEDO].texture, GIHeap::Slot::Albedo, ShadowsHeap::Slot::None, albedoTexture.put()); + //ShareRT(rendererRD.renderTargets[REFLECTANCE].texture, GIHeap::Slot::None, ShadowsHeap::Slot::None, gbufferReflectanceTexture.put()); + //ShareRT(rendererRD.renderTargets[NORMALROUGHNESS].texture, HeapSlot::NormalRoughness, ComputeHeapSlot::None, normalRoughnessTexture.put()); + ShareRT(rendererRD.renderTargets[MASKS2].texture, GIHeap::Slot::GNMD, ShadowsHeap::Slot::None, GNMDTexture.put()); // GNMD + + DX::ThrowIfFailed(albedoTexture->SetName(L"Shared Albedo Texture")); + //DX::ThrowIfFailed(gbufferReflectanceTexture->SetName(L"Shared Reflectance Texture")); + //DX::ThrowIfFailed(normalRoughnessTexture->SetName(L"Shared NormalRoughness Texture")); + DX::ThrowIfFailed(GNMDTexture->SetName(L"Shared GNMD Texture")); +} + +bool IsValidLight(RE::BSLight* a_light) +{ + return a_light && !a_light->light->GetFlags().any(RE::NiAVObject::Flag::kHidden); +} + +bool IsGlobalLight(RE::BSLight* a_light) +{ + return !(a_light->portalStrict || !a_light->portalGraph); +} + +eastl::vector Raytracing::GetPointLights() +{ + eastl::vector lightsData{}; + + auto accumulator = *globals::game::currentAccumulator.get(); + const auto activeShadowSceneNode = accumulator->GetRuntimeData().activeShadowSceneNode; + + auto& isl = globals::features::inverseSquareLighting; + + auto addLight = [&](const RE::NiPointer& e) { + if (auto bsLight = e.get()) { + if (auto niLight = bsLight->light.get()) { + if (IsValidLight(bsLight)) { + auto& runtimeData = niLight->GetLightRuntimeData(); + + LightLimitFix::LightData light{}; + light.color = float3(runtimeData.diffuse.red, runtimeData.diffuse.green, runtimeData.diffuse.blue); + light.lightFlags = std::bit_cast(runtimeData.ambient.red); + + if (isl.loaded) { + isl.ProcessLight(light, bsLight, niLight); + } else { + light.radius = runtimeData.radius.x; + light.invRadius = 1.0f / light.radius; + light.fade = runtimeData.fade; + + if (settings.LodDimmer) + light.fade *= bsLight->lodDimmer; + } + + if (!IsGlobalLight(bsLight)) { + light.lightFlags.set(LightLimitFix::LightFlags::PortalStrict); + } + + if (bsLight->IsShadowLight()) { + auto* shadowLight = static_cast(bsLight); + GET_INSTANCE_MEMBER(maskIndex, shadowLight); + light.shadowMaskIndex = maskIndex; + light.lightFlags.set(LightLimitFix::LightFlags::Shadow); + } + + // Check for inactive shadow light + if (light.shadowMaskIndex != 255) { + auto worldPos = niLight->world.translate; + + light.positionWS[0].data = float3(worldPos.x, worldPos.y, worldPos.z); + + if ((light.color.x + light.color.y + light.color.z) > 1e-4 && light.radius > 1e-4) { + lightsData.push_back(light); + } + } + } + } + } + }; + + const auto& activeLights = activeShadowSceneNode->GetRuntimeData().activeLights; + for (auto& light : activeLights) { + addLight(light); + } + + const auto& activeShadowLights = activeShadowSceneNode->GetRuntimeData().activeShadowLights; + for (auto& light : activeShadowLights) { + addLight(light); + } + + return lightsData; +} + +void Raytracing::UpdateLights() +{ + if (!renderingWorld || lightsUpdated) + return; + + // Directional light + { + auto accumulator = *globals::game::currentAccumulator.get(); + auto dirLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); + + auto direction = Float3(dirLight->GetWorldDirection()); + direction.Normalize(); + + auto& diffuse = dirLight->GetLightRuntimeData().diffuse; + + frameData->Directional.Vector = -direction; + frameData->Directional.Color = float3(diffuse.red, diffuse.green, diffuse.blue) * settings.Directional; + } + + // Point lights + { + lights.clear(); + lights.reserve(RTConstants::MAX_LIGHTS); + + for (auto data : GetPointLights()) { + if (lights.size() >= RTConstants::MAX_LIGHTS) + break; + + if (data.lightFlags.any(LightLimitFix::LightFlags::Disabled)) + continue; + + Light light; + light.Vector = data.positionWS[0].data; + light.Radius = data.radius; + light.Color = data.color * settings.Point; + light.InvRadius = data.invRadius; + light.FadeZone = data.fadeZone; + light.SizeBias = data.sizeBias; + light.Fade = data.fade; + light.Type = 0; + light.Flags = 0; + + if (data.lightFlags.any(LightLimitFix::LightFlags::InverseSquare)) + light.Flags |= (1 << 0); + + if (data.lightFlags.any(LightLimitFix::LightFlags::Linear)) + light.Flags |= (1 << 1); + + lights.push_back(light); + } + + if (!lights.empty()) + lightBuffer->UpdateList(lights.data(), lights.size()); + } + + lightsUpdated = true; +} + +static DirectX::XMFLOAT3X4 GetXMF3X4FromNiTransform(const RE::NiTransform& Transform) +{ + const RE::NiMatrix3& m = Transform.rotate; + const float scale = Transform.scale; + + return { + m.entry[0][0] * scale, m.entry[1][0] * scale, m.entry[2][0] * scale, + m.entry[0][1] * scale, m.entry[1][1] * scale, m.entry[2][1] * scale, + m.entry[0][2] * scale, m.entry[1][2] * scale, m.entry[2][2] * scale, + Transform.translate.x, Transform.translate.y, Transform.translate.z + }; +} + +void Raytracing::CopyDepth() const +{ + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + + auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN]; // kMAIN kPOST_ZPREPASS_COPY + + context->CSSetShader(copyDepthCS.get(), nullptr, 0); + + //auto* renderSizeCB = renderResCB->CB(); + //context->CSSetConstantBuffers(0, 1, &renderSizeCB); + + auto* cb = globals::state->sharedDataCB->CB(); + context->CSSetConstantBuffers(5, 1, &cb); + + context->CSSetShaderResources(0, 1, &depth.depthSRV); + + //auto sampler = samplerState.get(); + //context->CSSetSamplers(0, 1, &sampler); + + eastl::array uavs = { + depthTexture->uav, + svgfDenoiser->depthLinearTexture->uav.get() + }; + context->CSSetUnorderedAccessViews(0, (UINT)uavs.size(), uavs.data(), nullptr); + + uint2 screenSize = GetScreenSize(); + uint2 dispatchCount = { DivideRoundUp(screenSize.x, 8u), DivideRoundUp(screenSize.y, 8u) }; + context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + + uavs.fill(nullptr); + context->CSSetUnorderedAccessViews(0, (UINT)uavs.size(), uavs.data(), nullptr); +} + +void Raytracing::UnpackMetallicAO() const +{ + +} + +void Raytracing::CopyConvertTextures() const +{ + auto context = globals::d3d::context; + auto renderer = globals::game::renderer; + + context->CSSetShader(settings.PathTracing ? convertTexturesPTCS.get() : convertTexturesCS.get(), nullptr, 0); + + auto* renderSizeCB = renderResCB->CB(); + context->CSSetConstantBuffers(0, 1, &renderSizeCB); + + auto* frameBufferCB = *globals::game::perFrame.get(); + context->CSSetConstantBuffers(12, 1, &frameBufferCB); + + ID3D11ShaderResourceView* srvs[4] = { + renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS].SRV, + renderer->GetRuntimeData().renderTargets[ALBEDO].SRV, + renderer->GetRuntimeData().renderTargets[MASKS2].SRV, + renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR].SRV + }; + + context->CSSetShaderResources(0, _countof(srvs), srvs); + + auto sampler = samplerState.get(); + context->CSSetSamplers(0, 1, &sampler); + + ID3D11UnorderedAccessView* uavs[3] = { + normalRoughnessTexture->uav, + diffuseAlbedoTexture->uav, + motionVectorsTexture->uav + }; + + context->CSSetUnorderedAccessViews(0, _countof(uavs), uavs, nullptr); + + uint2 dispatchCount = { DivideRoundUp(renderSize.x, 8u), DivideRoundUp(renderSize.y, 8u) }; + context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + + uavs[0] = nullptr; + uavs[1] = nullptr; + uavs[2] = nullptr; + context->CSSetUnorderedAccessViews(0, _countof(uavs), uavs, nullptr); +} + +void Raytracing::SkyCubeToHemi() const +{ + auto context = globals::d3d::context; + + context->CSSetShader(cubeToHemiCS.get(), nullptr, 0); + + auto reflections = globals::game::renderer->GetRendererData().cubemapRenderTargets[RE::RENDER_TARGET_CUBEMAP::kREFLECTIONS]; + auto reflectionOcc = globals::features::cloudShadows.loaded ? globals::features::cloudShadows.texCubemapCloudOccCopy->srv.get() : nullptr; + + eastl::array srvs = { + reflections.SRV, + reflectionOcc + }; + context->CSSetShaderResources(0, (UINT)srvs.size(), srvs.data()); + + auto sampler = samplerState.get(); + context->CSSetSamplers(0, 1, &sampler); + + ID3D11UnorderedAccessView* uav = skyHemisphere->uav; + context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + + uint dispatch = (uint)std::ceil(RTConstants::SKY_HEMI_SIZE / 8.0f); + context->Dispatch(dispatch, dispatch, 1); + + uav = nullptr; + context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); +} + +void Raytracing::ConvertMSN() +{ + while (!msnConvertionQueue.empty()) { + auto& key = msnConvertionQueue.front(); + + if (auto model = models.find(key); model != models.end()) { + model->second->ConvertMSN(); + } + + msnConvertionQueue.pop_front(); + } +} + +void Raytracing::Main_RenderWorld(bool a1) +{ + if (Active()) { + renderingWorld = true; + lightsUpdated = false; + + SkyCubeToHemi(); + ConvertMSN(); + } + + Hooks::Main_RenderWorld::func(a1); + + if (Active()) { + renderingWorld = false; + } +} + +// A custom visit controller built to ignore billboard/particle geometry +static RE::BSVisit::BSVisitControl TraverseScenegraphRTGeometries(RE::NiAVObject* a_object, RE::BSFadeNode* validFadeNode, std::function a_func) +{ + auto result = RE::BSVisit::BSVisitControl::kContinue; + + if (!a_object) { + return result; + } + + auto geom = a_object->AsGeometry(); + if (geom) { + return a_func(geom); + } + + // Doodlum sez this is faster + auto rtti = a_object->GetRTTI(); + + static REL::Relocation billboardRTTI{ RE::NiBillboardNode::Ni_RTTI }; + if (rtti == billboardRTTI.get()) + return result; + + // Might break vegetation + static REL::Relocation orderedRTTI{ RE::BSOrderedNode::Ni_RTTI }; + if (rtti == orderedRTTI.get()) + return result; + + auto node = a_object->AsNode(); + if (node) { + for (auto& child : node->GetChildren()) { + if (!child) + continue; + + if (validFadeNode) { + if (auto fadeNode = child->AsFadeNode(); fadeNode && fadeNode != validFadeNode) { + continue; + } + } + + result = TraverseScenegraphRTGeometries(child.get(), validFadeNode, a_func); + if (result == RE::BSVisit::BSVisitControl::kStop) { + break; + } + } + } + + return result; +} + +static RE::BSVisit::BSVisitControl TraverseScenegraphFadeNodes(RE::NiAVObject* a_object, std::function a_func) +{ + auto result = RE::BSVisit::BSVisitControl::kContinue; + + if (!a_object) { + return result; + } + + auto fadeNode = a_object->AsFadeNode(); + if (fadeNode) { + result = a_func(fadeNode); + + if (result == RE::BSVisit::BSVisitControl::kStop) { + return result; + } + } + + auto node = a_object->AsNode(); + if (node) { + for (auto& child : node->GetChildren()) { + result = TraverseScenegraphFadeNodes(child.get(), a_func); + if (result == RE::BSVisit::BSVisitControl::kStop) { + break; + } + } + } + + return result; +} + +void Raytracing::CreateModel(RE::TESForm* form, const char* model, RE::NiAVObject* root) +{ + if (!root) { + logger::warn("[RT] CreateModel - NULL root object for model: {}", model ? model : "unknown"); + return; + } + + const REL::Relocation rtti{ RE::NiMultiTargetTransformController::Ni_RTTI }; + auto* controller = reinterpret_cast(root->GetController(rtti.get())); + + if (controller) { + eastl::hash_set parents; + eastl::hash_set targets; + + for (uint16_t i = 0; i < controller->numInterps; i++) { + auto* target = controller->targets[i]; + + if (!target) + continue; + + auto [it, emplaced] = targets.emplace(target); + parents.emplace(target->parent); + + if (!emplaced) + continue; + + CreateModelInternal(form, std::format("{}_{}", model, target->name.c_str()).c_str(), target); + } + + for (auto* parent : parents) { + for (auto& child : parent->GetChildren()) { + if (targets.find(child.get()) != targets.end()) + continue; + + CreateModelInternal(form, std::format("{}_{}_{}", model, child->name.c_str(), child->parentIndex).c_str(), child.get()); + } + } + + return; + } + + CreateModelInternal(form, model, root); +} + +void Raytracing::CreateActorModel([[maybe_unused]] RE::Actor* actor, [[maybe_unused]] const char* name, RE::NiAVObject* root) +{ + TraverseScenegraphFadeNodes(root, [&]([[maybe_unused]] RE::BSFadeNode* fadeNode) -> RE::BSVisit::BSVisitControl { + const bool isRoot = (fadeNode == root); + + auto fadeNodeName = std::format("{}.{}", name, fadeNode->name.c_str()); + CreateModelInternal(actor, isRoot ? name : fadeNodeName.c_str(), fadeNode); + + return RE::BSVisit::BSVisitControl::kContinue; + }); +} + +void Raytracing::CreateModelInternal(RE::TESForm* form, const char* path, RE::NiAVObject* pRoot) +{ + if (!pRoot) { + logger::error("[RT] CreateModel \"{}\" - nullptr root", path); + return; + } + + logger::trace("[RT] CreateModel \"{}\"", typeid(*pRoot).name()); + + if (!path) { + logger::debug("[RT] CreateModel \"{}\" - Invalid Path", pRoot->name); + return; + } + + if (strlen(path) == 0) { + logger::debug("[RT] CreateModel \"{}\" - Empty Path", pRoot->name); + return; + } + + if (instances.find(pRoot) != instances.end()) { + logger::warn("[RT] CreateModel \"{}\" - Instance/Model for 0x{:08X} already present.", path, reinterpret_cast(pRoot)); + return; + } + + const auto* bsxFlags = pRoot->GetExtraData("BSX"); + + if (bsxFlags) { + if (static_cast(bsxFlags->value) & static_cast(RE::BSXFlags::Flag::kEditorMarker)) + return; + + logger::debug("[RT] CreateModel - BSX Flags [0x{:x}]: {}", bsxFlags->value, GetFlagsString(bsxFlags->value)); + } + + auto formID = form->GetFormID(); + + // We only need one buffer per model + if (models.find(path) != models.end()) { + AddInstance(formID, pRoot, path); + return; + } + + logger::debug("[RT] CreateModel - Path: {}, FormID [0x{:08X}], NiNode [0x{:08X}]: {}", path, formID, reinterpret_cast(pRoot), pRoot->name); + + auto formType = form->GetFormType(); + + auto rootWorldInverse = pRoot->world.Invert(); + + eastl::vector> shapes; + + // Will traverse and skip non-root fade nodes (and their children) + auto* validFadeNode = (formType == RE::FormType::ActorCharacter ? reinterpret_cast(pRoot) : nullptr); + + TraverseScenegraphRTGeometries(pRoot, validFadeNode, [&](RE::BSGeometry * pGeometry)->RE::BSVisit::BSVisitControl { + const char* name = pGeometry->name.c_str(); + + logger::trace("\t\t[RT] CreateModel::TraverseScenegraphGeometries - {}", name); + + const auto& geometryType = pGeometry->GetType(); + + if (geometryType.none(RE::BSGeometry::Type::kTriShape, RE::BSGeometry::Type::kDynamicTriShape)) { + logger::warn("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Unsupported Geometry: {} for {}", magic_enum::enum_name(geometryType.get()), name); + return RE::BSVisit::BSVisitControl::kContinue; + } + + const auto& geometryRuntimeData = pGeometry->GetGeometryRuntimeData(); + + auto* effect = geometryRuntimeData.properties[RE::BSGeometry::States::kEffect].get(); + + if (!effect) { + logger::debug("\t\t[RT] CreateModel::TraverseScenegraphGeometries - No Effect"); + return RE::BSVisit::BSVisitControl::kContinue; + } + + bool isLightingShader = netimmerse_cast(effect) != nullptr; + bool isEffectShader = netimmerse_cast(effect) != nullptr; + + // Only lighting and effect shader for now + if (!isLightingShader && !isEffectShader) { + logger::warn("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Unsupported shader type: {}", effect->GetRTTI()->name); + return RE::BSVisit::BSVisitControl::kContinue; + } + + auto shaderProperty = netimmerse_cast(effect); + bool skinned = shaderProperty && shaderProperty->flags.any(RE::BSShaderProperty::EShaderPropertyFlag::kSkinned); + + auto& geomFlags = pGeometry->GetFlags(); + + if (geomFlags.any(RE::NiAVObject::Flag::kHidden) && !skinned) { + logger::debug("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Is Hidden"); + return RE::BSVisit::BSVisitControl::kContinue; + } + + auto flags = Shape::Flags::None; + + // Landscape needs special handling of triangles + if (formType == RE::FormType::Land) + flags |= Shape::Flags::Landscape; + + if (geometryType.all(RE::BSGeometry::Type::kDynamicTriShape)) + flags |= Shape::Flags::Dynamic; + + float3x4 localToRoot; + XMStoreFloat3x4(&localToRoot, GetXMFromNiTransform(rootWorldInverse * pGeometry->world)); + + if (auto* triShapeRD = geometryRuntimeData.rendererData) { // Non-Skinned + auto* pTriShape = netimmerse_cast(pGeometry); + + const auto& triShapeRuntime = pTriShape->GetTrishapeRuntimeData(); + + if (triShapeRuntime.vertexCount == 0) { + logger::error("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Vertex count of 0 for {}: {}", path ? path : "N/A", name ? name : "N/A"); + return RE::BSVisit::BSVisitControl::kContinue; + } + + if (triShapeRuntime.triangleCount == 0) { + logger::error("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Triangle count of 0 for {}: {}", path ? path : "N/A", name ? name : "N/A"); + return RE::BSVisit::BSVisitControl::kContinue; + } + + auto shape = eastl::make_unique(flags, shapeRegisters.Allocate(), pGeometry, localToRoot); + + shape->BuildMesh(triShapeRD, triShapeRuntime.vertexCount, triShapeRuntime.triangleCount, 0); + shape->BuildMaterial(geometryRuntimeData, name, formID); + shape->CreateBuffers(ToWide(name)); + + shapes.push_back(eastl::move(shape)); + } else if (auto* skinInstance = geometryRuntimeData.skinInstance.get()) { // Skinned + auto& skinPartition = skinInstance->skinPartition; + + if (!skinPartition) { + logger::warn("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Invalid SkinPartition"); + return RE::BSVisit::BSVisitControl::kContinue; + } + + if (skinPartition->vertexCount == 0) { + logger::error("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Vertex count of 0 for {}: {}", path ? path : "N/A", name ? name : "N/A"); + return RE::BSVisit::BSVisitControl::kContinue; + } + + const auto skinNumPartitions = skinPartition->numPartitions; + + logger::debug("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Partitions: {}, VertexCount: {}, Unk24: [0x{:X}]", skinNumPartitions, skinPartition->vertexCount, skinPartition->unk24); + + // This looks diabolical + static REL::Relocation dismemberRTTI{ RE::BSDismemberSkinInstance::Ni_RTTI }; + + eastl::vector dismemberData(skinNumPartitions, { true, false, 0 }); + + decltype(dismemberReferences.begin()) it; + bool emplacedDismemberRef = false; + + if (skinInstance->GetRTTI() == dismemberRTTI.get()) { + auto* dismemberSkinInstance = reinterpret_cast(skinInstance); + + auto& dismemberRuntime = dismemberSkinInstance->GetRuntimeData(); + + const auto dismemberNumPartitions = static_cast(dismemberRuntime.numPartitions); + + if (skinNumPartitions != dismemberNumPartitions) + logger::error("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Skin and Dismember partition count mismatch"); + + std::memcpy(dismemberData.data(), dismemberRuntime.partitions, dismemberNumPartitions * sizeof(RE::BSDismemberSkinInstance::Data)); + + eastl::tie(it, emplacedDismemberRef) = dismemberReferences.try_emplace(dismemberSkinInstance, eastl::vector(skinNumPartitions)); + } + + for (size_t i = 0; i < skinPartition->partitions.size(); i++) { + auto& partition = skinPartition->partitions[i]; + auto& dismemberPartition = dismemberData[i]; + + // Fix for modded geometry + if (partition.triangles == 0) { + logger::error("\t\t[RT] CreateModel::TraverseScenegraphGeometries - Triangle count of 0 for {}: {}", path ? path : "N/A", name ? name : "N/A"); + continue; + } + + // Fix for modded geometry + if (partition.bonesPerVertex > 0) + flags |= Shape::Flags::Skinned; + + auto shape = eastl::make_unique(flags, shapeRegisters.Allocate(), pGeometry, localToRoot, dismemberPartition.editorVisible, dismemberPartition.slot); + + // Diabolical Part II + if (emplacedDismemberRef) + it->second[i] = shape.get(); + + shape->BuildMesh(partition.buffData, skinPartition->vertexCount, partition.triangles, partition.bonesPerVertex); + shape->BuildMaterial(geometryRuntimeData, name, formID); + shape->CreateBuffers(ToWide(name)); + + shapes.push_back(eastl::move(shape)); + } + } + + return RE::BSVisit::BSVisitControl::kContinue; + }); + + if (auto shapeCount = shapes.size(); shapeCount > 0) { + eastl::string modelKey = path; + + auto model = eastl::make_unique(shapes); + + // Models with these flags cannot be instanced directly + if (model->GetShapeFlags().any(Shape::Flags::Dynamic, Shape::Flags::Skinned)) + modelKey.append(Model::KeySuffix(pRoot).c_str()); + + auto [it, emplaced] = models.try_emplace(modelKey, eastl::move(model)); + + if (emplaced) { + if (it->second->ShouldQueueMSNConversion()) + msnConvertionQueue.emplace_back(modelKey); + + it->second->BuildBLAS(commandList.get()); + + AddInstance(formID, pRoot, modelKey); + + logger::debug("[RT] CreateModel - Commited {} TriShapes to [0x{:08X}]", shapeCount, reinterpret_cast(it->second.get())); + } else { + logger::warn("[RT] CreateModel - Emplace failed for {} TriShapes", shapeCount); + } + } else { + logger::debug("[RT] CreateModel - No TriShapes to commit"); + } +} + +bool Raytracing::RemoveInstance(RE::NiAVObject* pRoot, bool releaseModel) +{ + std::lock_guard lock{ renderMutex }; + + if (auto instanceIt = instances.find(pRoot); instanceIt != instances.end()) { + auto& instance = instanceIt->second; + + logger::debug("[RT] RemoveInstance - \"{}\", \"{}\"", pRoot->name, instance.filename); + + if (auto modelIt = models.find(instance.filename); modelIt != models.end()) { + auto& model = modelIt->second; + + auto refCount = model->Release(); + + logger::debug("[RT] RemoveInstance - RefCount: {}", refCount); + + // If this is the last Instance of the model, remove it + if (refCount <= 0 && releaseModel) { + logger::debug("[RT] RemoveInstance - No refs, erasing from collection"); + + auto retiredModel = eastl::move(modelIt->second); + models.erase(modelIt); + + TempGPUData retiredData{}; + retiredData.fenceValue = fenceValue + 1; + retiredData.retiredModels.push_back(eastl::move(retiredModel)); + tempGPUData.push_back(eastl::move(retiredData)); + } + } + + instances.erase(instanceIt); + + return true; + } + + return false; +} + +bool Raytracing::RemoveInstance(RE::FormID formID, bool releaseModel) +{ + bool removed = false; + + if (auto nodesIt = formIDNodes.find(formID); nodesIt != formIDNodes.end()) { + for (auto& rootNode : nodesIt->second) { + removed = RemoveInstance(rootNode, releaseModel); + } + + formIDNodes.erase(nodesIt); + } + + return removed; +} + +void Raytracing::SetInstanceDetached(RE::NiAVObject* root, bool detached) +{ + if (auto instanceIt = instances.find(root); instanceIt != instances.end()) { + instanceIt->second.SetDetached(detached); + } +} + +void Raytracing::SetInstanceDetached(RE::FormID formID, bool detached) +{ + if (auto nodesIt = formIDNodes.find(formID); nodesIt != formIDNodes.end()) { + for (auto& rootNode : nodesIt->second) { + SetInstanceDetached(rootNode, detached); + } + } +} + +eastl::shared_ptr Raytracing::GetTextureRegister(ID3D11Texture2D* dx11Texture, eastl::shared_ptr defaultTexture) +{ + std::lock_guard lock{ textureRegisterMutex }; + + if (!dx11Texture) + return defaultTexture; + + // Texture already placed in heap, return allocation + if (auto refIt = textures.find(dx11Texture); refIt != textures.end()) { + return refIt->second->allocation; + } + + winrt::com_ptr dxgiResource; + HRESULT hr = dx11Texture->QueryInterface(IID_PPV_ARGS(dxgiResource.put())); + + if (FAILED(hr)) { + logger::error("[RT] GetTextureRegister - Failed to query interface."); + return defaultTexture; + } + + HANDLE sharedHandle = nullptr; + hr = dxgiResource->GetSharedHandle(&sharedHandle); + + if (FAILED(hr) || !sharedHandle) { + D3D11_TEXTURE2D_DESC desc; + dx11Texture->GetDesc(&desc); + + logger::debug("[RT] GetTextureRegister - Failed to get shared handle - [{}, {}] Format: {}, Flags: {}", desc.Width, desc.Height, magic_enum::enum_name(desc.Format), GetFlagsString(desc.MiscFlags)); + return defaultTexture; + } + + winrt::com_ptr dx12Texture; + hr = d3d12Device->OpenSharedHandle(sharedHandle, IID_PPV_ARGS(dx12Texture.put())); + + CloseHandle(sharedHandle); + + if (FAILED(hr)) { + logger::error("[RT] GetTextureRegister - Failed to open shared handle."); + return defaultTexture; + } + + if (!dx12Texture) { + logger::error("[RT] GetTextureRegister - Failed to adquire DX12 texture."); + return defaultTexture; + } + + D3D12_RESOURCE_DESC texResDesc = dx12Texture->GetDesc(); + + D3D12_SHADER_RESOURCE_VIEW_DESC texSrvDesc = {}; + texSrvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + texSrvDesc.Format = texResDesc.Format; + texSrvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + texSrvDesc.Texture2D.MostDetailedMip = 0; + texSrvDesc.Texture2D.MipLevels = texResDesc.MipLevels; + texSrvDesc.Texture2D.PlaneSlice = 0; + texSrvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + auto [it, emplaced] = textures.try_emplace(dx11Texture, nullptr); + + if (emplaced) { + it->second = eastl::make_unique(std::move(dx12Texture), eastl::shared_ptr(textureRegisters.Allocate(), AllocationDeleter())); + + auto allocationIndex = it->second->allocation->GetIndex(); + + it->second->resource->SetName(std::format(L"Shared Texture [{}]", allocationIndex).c_str()); + + d3d12Device->CreateShaderResourceView(it->second->resource.get(), &texSrvDesc, giHeap->CPUHandle(GIHeap::Slot::Textures, allocationIndex)); + + return it->second->allocation; + } else { + dx12Texture->SetName(L"Shared Texture [?]"); + logger::error("[RT] GetTextureRegister - TextureReference emplace failed."); + } + + logger::debug("[RT] GetTextureRegister - Source texture not found"); + + return defaultTexture; +} + +eastl::shared_ptr Raytracing::GetMSNormalMapRegister([[maybe_unused]] Shape* shape, RE::BSGraphics::Texture* texture, eastl::shared_ptr defaultTexture) +{ + std::lock_guard lock{ textureRegisterMutex }; + + auto* texture2D = reinterpret_cast(texture->texture); + + if (auto refIt = normalMaps.find(texture2D); refIt != normalMaps.end()) { + return refIt->second->Reference->allocation; + } else { + auto [it, emplaced] = normalMaps.emplace(texture2D, eastl::make_unique()); + + if (!emplaced) { + logger::warn("[RT] GetMSNormalMapRegister - NormalMap emplace failed."); + return defaultTexture; + } + + auto* normalMap = it->second.get(); + + normalMap->OriginalSRV = texture->resourceView; + + D3D11_TEXTURE2D_DESC desc; + texture2D->GetDesc(&desc); + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_R10G10B10A2_UNORM; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + desc.CPUAccessFlags = 0; + desc.MiscFlags = D3D11_RESOURCE_MISC_SHARED; + + normalMap->Texture = eastl::make_unique(desc); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc{}; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = desc.MipLevels; + + normalMap->Texture->CreateSRV(srvDesc); + + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc{}; + rtvDesc.Format = desc.Format; + rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; + rtvDesc.Texture2D.MipSlice = 0; + + normalMap->Texture->CreateRTV(rtvDesc); + + static float clearColor[4] = { 0.5f, 0.5f, 1.0f, 1.0f }; + globals::d3d::context->ClearRenderTargetView(normalMap->Texture->rtv.get(), clearColor); + + // Share the new texture + winrt::com_ptr dxgiResource; + HRESULT hr = normalMap->Texture->resource->QueryInterface(IID_PPV_ARGS(dxgiResource.put())); + + if (FAILED(hr)) { + logger::error("[RT] GetMSNormalMapRegister - Failed to query interface."); + return defaultTexture; + } + + HANDLE sharedHandle = nullptr; + hr = dxgiResource->GetSharedHandle(&sharedHandle); + + if (FAILED(hr) || !sharedHandle) { + logger::error("[RT] GetMSNormalMapRegister - Failed to get shared handle."); + return defaultTexture; + } + + winrt::com_ptr dx12Texture; + hr = d3d12Device->OpenSharedHandle(sharedHandle, IID_PPV_ARGS(dx12Texture.put())); + + CloseHandle(sharedHandle); + + if (FAILED(hr)) { + logger::error("[RT] GetMSNormalMapRegister - Failed to open shared handle."); + return defaultTexture; + } + + if (!dx12Texture) { + logger::error("[RT] GetMSNormalMapRegister - Failed to adquire DX12 texture."); + return defaultTexture; + } + + D3D12_RESOURCE_DESC texResDesc = dx12Texture->GetDesc(); + + D3D12_SHADER_RESOURCE_VIEW_DESC texSrvDesc = {}; + texSrvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + texSrvDesc.Format = texResDesc.Format; + texSrvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + texSrvDesc.Texture2D.MostDetailedMip = 0; + texSrvDesc.Texture2D.MipLevels = texResDesc.MipLevels; + texSrvDesc.Texture2D.PlaneSlice = 0; + texSrvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + normalMap->Reference = eastl::make_unique(std::move(dx12Texture), eastl::shared_ptr(textureRegisters.Allocate(), AllocationDeleter())); + + auto allocationIndex = normalMap->Reference->allocation->GetIndex(); + + normalMap->Reference->resource->SetName(std::format(L"Shared MS Normalmap [{}]", allocationIndex).c_str()); + + d3d12Device->CreateShaderResourceView(normalMap->Reference->resource.get(), &texSrvDesc, giHeap->CPUHandle(GIHeap::Slot::Textures, allocationIndex)); + + allocationMSNormalMaps.emplace(normalMap->Reference->allocation->GetIndex(), texture2D); + + return normalMap->Reference->allocation; + } +} + +void Raytracing::AddInstance(RE::FormID formID, RE::NiAVObject* pNiNode, eastl::string path) +{ + logger::debug("[RT] AddInstance [0x{:08X}] - {}, Path: {}", formID, pNiNode->name, path); + + if (auto instanceIt = instances.find(pNiNode); instanceIt == instances.end()) { + if (auto modelIt = models.find(path); modelIt != models.end()) { + auto [it, emplaced] = instances.try_emplace(pNiNode, Instance(formID, path)); + + if (emplaced) { + if (auto nodesIt = formIDNodes.find(formID); nodesIt != formIDNodes.end()) { + nodesIt->second.push_back(pNiNode); + } else { + formIDNodes.try_emplace(formID, eastl::vector{ pNiNode }); + } + + modelIt->second->AddRef(); + } + } + } +} + +eastl::vector Raytracing::GatherInstanceLights(RE::NiAVObject* pNiNode) +{ + eastl::vector instanceLights; + + float3 center = Float3(pNiNode->worldBound.center); + float radius = pNiNode->worldBound.radius; + + for (size_t i = 0; i < lights.size(); i++) { + const Light& light = lights[i]; + + if ((center - light.Vector).Length() <= radius + light.Radius) + instanceLights.push_back(i); + } + + return instanceLights; +} + +static RE::NiCamera* FindNiCamera(RE::NiAVObject* object) +{ + if (auto* camera = skyrim_cast(object)) + return camera; + + auto* node = object->AsNode(); + if (!node) + return nullptr; + + for (auto& child : node->GetChildren()) { + if (child) { + if (auto* res = FindNiCamera(child.get())) + return res; + } + } + return nullptr; +} + +void Raytracing::UpdateInstances() +{ + //std::lock_guard lock{ geometryMutex }; + + blasInstances.clear(); + blasInstances.reserve(instances.size()); + + const auto& cullingSettings = settings.AdvancedSettings.Culling; + + auto* player = RE::PlayerCharacter::GetSingleton(); + + auto* tesCamera = RE::PlayerCamera::GetSingleton()->currentState->camera; + RE::NiCamera* camera = FindNiCamera(tesCamera->cameraRoot.get()); + RE::NiPoint3 position = camera->world.translate; + + //auto eye = Util::GetAverageEyePosition(); + //float4 cameraPos = globals::game::frameBufferCached.GetCameraPosAdjust(); + + uint32_t shapeIndex = 0; + uint32_t instanceIndex = 0; + + for (auto& [node, instance] : instances) { + if (instance.IsDetached()) + continue; + + if (blasInstances.size() > RTConstants::MAX_INSTANCES) + break; + + if (instance.formID == player->formID && !player->Is3rdPersonVisible()) + continue; + + auto it = models.find(instance.filename); + + // Model was erased but not its (this) instance + if (it == models.end()) + continue; + + auto& model = it->second; + + auto shapeFlags = model->GetShapeFlags(); + + const bool dynamic = shapeFlags.any(Shape::Flags::Dynamic); + const bool skinned = shapeFlags.any(Shape::Flags::Skinned); + const bool landscape = shapeFlags.any(Shape::Flags::Landscape); + + if (settings.DisableSkinned && (dynamic || skinned)) + continue; + + if (skinned && node->GetFlags().any(RE::NiAVObject::Flag::kHidden)) + continue; + + if (cullingSettings.Mode == CullingMode::Smart) { + if (landscape && node->GetFlags().any(RE::NiAVObject::Flag::kHidden)) + continue; + + auto worldBound = node->worldBound; + + float worldBoundRadius = Util::Units::GameUnitsToMeters(worldBound.radius); + float distanceToBounds = Util::Units::GameUnitsToMeters(position.GetDistance(worldBound.center)) - worldBoundRadius; + + auto shaderTypes = model->GetShaderTypes(); + auto features = model->GetFeatures(); + + bool frustumCull = false; + + // Culls small models outside of the player's view + if (cullingSettings.MinRadius > 0) { + // We'll exclude emissive models from radius frustum culling + bool frustumCullable = !(shaderTypes & RE::BSShader::Type::Effect) && !(features & static_cast(RE::BSShaderMaterial::Feature::kGlowMap)); + frustumCull |= frustumCullable && (worldBoundRadius < cullingSettings.MinRadius); + } + + // Culls all models outside of the player's view, must satisfy condition + if (cullingSettings.DistanceMode == CullingDistanceMode::Minimal) { + frustumCull |= distanceToBounds > cullingSettings.MinDistance; + } else if (cullingSettings.DistanceMode == CullingDistanceMode::Ratio) { + float distanceToStart = std::max(0.0f, distanceToBounds - cullingSettings.StartDistance); + float adaptativeRadius = distanceToStart * cullingSettings.DistanceRatio; + frustumCull |= worldBoundRadius < adaptativeRadius; + } + + // We'll cull small models or very distant ones (that are outside the player view) + if (frustumCull && !camera->NodeInFrustum(node)) + continue; + + } else if (cullingSettings.Mode == CullingMode::Skyrim) { + if (node->GetFlags().any(RE::NiAVObject::Flag::kHidden)) + continue; + } + + instance.Update(node, position, { it->first, model.get() }, skinningPipeline.get()); + + // This is temporary while I think of a better place to fit this (probably on instance.Update?) + auto firstShapeIndex = shapeIndex; + + for (auto& shape : model->shapes) { + if (shapeIndex >= RTConstants::MAX_SHAPES) { + logger::critical("[RT] UpdateInstances - Total shape count {} would exceed RTConstants::MAX_SHAPES {}", shapeIndex, RTConstants::MAX_SHAPES); + break; + } + + if (model->HideShape(shape.get())) + continue; + + shapeData[shapeIndex] = shape->GetData(); + shapeIndex++; + } + + // No visible shape in instance + if (shapeIndex == firstShapeIndex) + continue; + + // TODO: split double sided models so only them get the flag + bool isDoubleSided = model->GetShaderFlags().any(RE::BSShaderProperty::EShaderPropertyFlag::kTwoSided); + + D3D12_RAYTRACING_INSTANCE_DESC blasInstance{}; + blasInstance.InstanceID = 0; + blasInstance.InstanceMask = 1; + blasInstance.Flags = isDoubleSided ? D3D12_RAYTRACING_INSTANCE_FLAG_TRIANGLE_CULL_DISABLE : D3D12_RAYTRACING_INSTANCE_FLAG_NONE; + blasInstance.AccelerationStructure = model->blasBuffer->GetResource()->GetGPUVirtualAddress(); + + // Copy transform matrix from Instance to DX12 BLAS instance + memcpy(blasInstance.Transform, instance.transform.m, sizeof(blasInstance.Transform)); + + blasInstances.push_back(blasInstance); + + instanceData[instanceIndex] = { + instance.transform, + LightData(GatherInstanceLights(node)), + firstShapeIndex + }; + instanceIndex++; + } + + shapeBuffer->Upload(commandList.get(), 0, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + blasInstanceBuffer->UpdateList(blasInstances.data(), std::min(blasInstances.size(), (size_t)RTConstants::MAX_INSTANCES)); + blasInstanceBuffer->Upload(commandList.get()); + + instanceBuffer->UpdateList(instanceData.data(), std::min(instanceIndex, RTConstants::MAX_INSTANCES)); + instanceBuffer->Upload(commandList.get(), 0, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); +} + +void Raytracing::UpdateBLASes() +{ + static eastl::vector barriers; + barriers.clear(); + + if (barriers.capacity() < instances.size()) + barriers.reserve(instances.size()); + + for (auto& [node, instance] : instances) { + auto it = models.find(instance.filename); + + auto& model = it->second; + + if (model->UpdateBLAS(commandList.get())) + barriers.push_back(CD3DX12_RESOURCE_BARRIER::UAV(model->blasBuffer->GetResource())); + } + + const uint blasUpdateCount = (uint)barriers.size(); + + if (blasUpdateCount > 0) + commandList->ResourceBarrier(blasUpdateCount, barriers.data()); +} + +auto GetFrustumCorners2(const RE::NiFrustum& frustum) +{ + eastl::array corners; + + // Near plane + corners[0] = { frustum.fLeft, frustum.fTop, frustum.fNear }; // near top-left + corners[1] = { frustum.fRight, frustum.fTop, frustum.fNear }; // near top-right + corners[2] = { frustum.fRight, frustum.fBottom, frustum.fNear }; // near bottom-right + corners[3] = { frustum.fLeft, frustum.fBottom, frustum.fNear }; // near bottom-left + + // Far plane + float scale = frustum.fFar / frustum.fNear; + corners[4] = { frustum.fLeft * scale, frustum.fTop * scale, frustum.fFar }; + corners[5] = { frustum.fRight * scale, frustum.fTop * scale, frustum.fFar }; + corners[6] = { frustum.fRight * scale, frustum.fBottom * scale, frustum.fFar }; + corners[7] = { frustum.fLeft * scale, frustum.fBottom * scale, frustum.fFar }; + + return corners; +} + +auto GetFrustumCorners(const RE::NiFrustum& frustum) +{ + eastl::array corners; + + float scale = frustum.fFar / frustum.fNear; + + // Near plane (Y = forward) + corners[0] = { frustum.fLeft, frustum.fNear, frustum.fTop }; // near top-left + corners[1] = { frustum.fRight, frustum.fNear, frustum.fTop }; // near top-right + corners[2] = { frustum.fRight, frustum.fNear, frustum.fBottom }; // near bottom-right + corners[3] = { frustum.fLeft, frustum.fNear, frustum.fBottom }; // near bottom-left + + // Far plane + corners[4] = { frustum.fLeft * scale, frustum.fFar, frustum.fTop * scale }; + corners[5] = { frustum.fRight * scale, frustum.fFar, frustum.fTop * scale }; + corners[6] = { frustum.fRight * scale, frustum.fFar, frustum.fBottom * scale }; + corners[7] = { frustum.fLeft * scale, frustum.fFar, frustum.fBottom * scale }; + + return corners; +} + +void ComputeFrustumAABB(eastl::array corners, float3& bbMin, float3& bbMax, DirectX::XMMATRIX* transform = nullptr) +{ + if (transform) + for (int i = 0; i < 8; i++) { + corners[i] = float3::Transform(corners[i], *transform); + } + + // Initialize AABB + bbMin = corners[0]; + bbMax = corners[0]; + + // Compute min/max for X, Y, Z + for (int i = 1; i < 8; i++) { + bbMin.x = std::min(bbMin.x, corners[i].x); + bbMin.y = std::min(bbMin.y, corners[i].y); + bbMin.z = std::min(bbMin.z, corners[i].z); + + bbMax.x = std::max(bbMax.x, corners[i].x); + bbMax.y = std::max(bbMax.y, corners[i].y); + bbMax.z = std::max(bbMax.z, corners[i].z); + } +} + +bool SphereCastAABB(const float3& sphereCenter, float sphereRadius, const float3& dir, float maxDistance, const float3& bbMin, const float3& bbMax, float* hitDistance = nullptr) +{ + auto SphereCastAxis = [](float origin, float dir, float min, float max, float& tmin, float& tmax) -> bool { + if (std::abs(dir) < 1e-6f) { + if (origin < min || origin > max) + return false; + + return true; + } + + float ood = 1.0f / dir; + float t1 = (min - origin) * ood; + float t2 = (max - origin) * ood; + + if (t1 > t2) + std::swap(t1, t2); + + tmin = std::max(tmin, t1); + tmax = std::min(tmax, t2); + + return tmin <= tmax; + }; + + // Expand AABB by sphere radius + float3 min = bbMin - float3(sphereRadius, sphereRadius, sphereRadius); + float3 max = bbMax + float3(sphereRadius, sphereRadius, sphereRadius); + + float tmin = 0.0f; + float tmax = maxDistance; + + if (!SphereCastAxis(sphereCenter.x, dir.x, min.x, max.x, tmin, tmax)) + return false; + + if (!SphereCastAxis(sphereCenter.y, dir.y, min.y, max.y, tmin, tmax)) + return false; + + if (!SphereCastAxis(sphereCenter.z, dir.z, min.z, max.z, tmin, tmax)) + return false; + + if (hitDistance) + *hitDistance = tmin; + + return true; +} + +auto GetPlanes(float3 corners[8]) +{ + eastl::array planes; + + // Near plane + planes[0] = Plane(corners[0], corners[1], corners[2]); + + // Far plane + planes[1] = Plane(corners[5], corners[4], corners[7]); + + // Left plane + planes[2] = Plane(corners[4], corners[0], corners[3]); + + // Right plane + planes[3] = Plane(corners[1], corners[5], corners[6]); + + // Bottom plane + planes[4] = Plane(corners[0], corners[4], corners[5]); + + // Top plane + planes[5] = Plane(corners[3], corners[7], corners[6]); + + return planes; +} + +bool SphereCastFrustum(const float3& sphereCenter, float radius, const float3& dir, const std::array& planes, float maxDistance = FLT_MAX, float* hitDistance = nullptr) +{ + float tmin = 0.0f; + float tmax = maxDistance; + + for (const auto& plane : planes) { + float nDotDir = plane.DotNormal(dir); + float dist = plane.DotCoordinate(sphereCenter); + + if (std::abs(nDotDir) < 1e-6f) { + // Ray is parallel to plane + if (dist < -radius) // sphere completely outside + return false; + else + continue; // sphere may be intersecting + } + + float t1 = (-radius - dist) / nDotDir; + float t2 = (radius - dist) / nDotDir; + if (t1 > t2) + std::swap(t1, t2); + + tmin = std::max(tmin, t1); + tmax = std::min(tmax, t2); + + if (tmin > tmax) // no intersection along this ray + return false; + } + + if (hitDistance) + *hitDistance = tmin; + + return true; +} + +void Raytracing::UpdateShadowInstances() +{ + std::lock_guard lock{ renderMutex }; + + //std::lock_guard lock{ geometryMutex }; + + blasShadowInstances.clear(); + blasShadowInstances.reserve(instances.size()); + + DirectX::XMMATRIX transformInverse; + float3 bbMin, bbMax; + float3 localLightDirection; + + auto* tesCamera = RE::PlayerCamera::GetSingleton()->currentState->camera; + RE::NiCamera* camera = FindNiCamera(tesCamera->cameraRoot.get()); + RE::NiPoint3 position = camera->world.translate; + + if (settings.CullShadows) { + auto transform = GetXMFromNiTransform(camera->world); + transformInverse = DirectX::XMMatrixInverse(nullptr, transform); + + RE::NiFrustum frustrum = camera->GetRuntimeData2().viewFrustum; + + auto frustumCorners = GetFrustumCorners(frustrum); + + ComputeFrustumAABB(frustumCorners, bbMin, bbMax); // In local (camera) space + + logger::trace("[RT] UpdateShadowInstances - Min: {}, Max: {}", bbMin, bbMax); + + localLightDirection = float3::TransformNormal(float3(shadowsCBData->Direction), transformInverse); + } + + for (auto& [pNiNode, instance] : instances) { + if (instance.IsDetached()) + continue; + + if (blasShadowInstances.size() > RTConstants::MAX_INSTANCES) + break; + + if (settings.CullShadows) { + auto worldBound = pNiNode->worldBound; + float3 localCenter = float3::Transform(Float3(worldBound.center), transformInverse); + + logger::trace("[RT] UpdateShadowInstances - Local Center: {}, Radius: {}", localCenter, worldBound.radius); + + if (!SphereCastAABB(localCenter, worldBound.radius, localLightDirection, FLT_MAX, bbMin, bbMax)) + continue; + } + + auto it = models.find(instance.filename); + + // Model was erased but not the instance + if (it == models.end()) + continue; + + auto& model = it->second; + + instance.Update(pNiNode, position, { it->first, model.get() }, skinningPipeline.get()); + + D3D12_RAYTRACING_INSTANCE_DESC blasShadowInstance = { + .InstanceID = static_cast(blasShadowInstances.size()), + .InstanceMask = 1, + .AccelerationStructure = model->blasBuffer->GetResource()->GetGPUVirtualAddress() + }; + + memcpy(blasShadowInstance.Transform, instance.transform.m, sizeof(blasShadowInstance.Transform)); + + blasShadowInstances.push_back(blasShadowInstance); + } + + blasShadowInstanceBuffer->UpdateList(blasShadowInstances.data(), std::min(blasShadowInstances.size(), (size_t)RTConstants::MAX_INSTANCES)); + blasShadowInstanceBuffer->Upload(commandList.get()); +} + +void Raytracing::PostRaytraceCleanup() +{ + uint64_t completedFenceValue = d3d12Fence ? d3d12Fence->GetCompletedValue() : 0; + + while (!tempGPUData.empty() && tempGPUData.front().fenceValue <= completedFenceValue) { + tempGPUData.pop_front(); + } +} + +void Raytracing::BSShader_SetupGeometry([[maybe_unused]] RE::BSShader* oThis, [[maybe_unused]] RE::BSRenderPass* pPass, [[maybe_unused]] uint32_t renderFlags) +{ + if (!Active() || !renderingWorld) + return; + + UpdateLights(); +} + +void Raytracing::BuildTLAS() +{ + if (tlas) + return; + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = { + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL, + .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE, + .NumDescs = RTConstants::MAX_INSTANCES, + .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY, + .InstanceDescs = blasInstanceBuffer->resource->GetGPUVirtualAddress() + }; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; + d3d12Device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo); + + auto desc = BASIC_BUFFER_DESC; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + // TLAS + { + desc.Width = prebuildInfo.ResultDataMaxSizeInBytes; + DX::ThrowIfFailed(d3d12Device->CreateCommittedResource(&DEFAULT_HEAP, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, nullptr, IID_PPV_ARGS(&tlas))); + DX::ThrowIfFailed(tlas->SetName(L"TLAS")); + + // SRV + D3D12_SHADER_RESOURCE_VIEW_DESC tlasDesc = {}; + tlasDesc.ViewDimension = D3D12_SRV_DIMENSION_RAYTRACING_ACCELERATION_STRUCTURE; + tlasDesc.RaytracingAccelerationStructure.Location = tlas->GetGPUVirtualAddress(); + tlasDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + d3d12Device->CreateShaderResourceView(nullptr, &tlasDesc, shadowHeap->CPUHandle(ShadowsHeap::Slot::TLAS)); + d3d12Device->CreateShaderResourceView(nullptr, &tlasDesc, giHeap->CPUHandle(GIHeap::Slot::TLAS)); + } + + // TLAS scratch (used for rebuilding) + desc.Width = std::max(prebuildInfo.ScratchDataSizeInBytes, 8ULL); + DX::ThrowIfFailed(d3d12Device->CreateCommittedResource(&DEFAULT_HEAP, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, IID_PPV_ARGS(&tlasScratch))); + DX::ThrowIfFailed(tlasScratch->SetName(L"TLAS scratch")); + + // TLAS update scratch + /*desc.Width = std::max(prebuildInfo.UpdateScratchDataSizeInBytes * TLAS_BUFFER_SIZE_MULT, 8ULL); // WARP bug workaround: use 8 if the required size was reported as less + DX::ThrowIfFailed(d3d12Device->CreateCommittedResource(&DEFAULT_HEAP, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON, nullptr, IID_PPV_ARGS(&tlasUpdateScratch))); + DX::ThrowIfFailed(tlasUpdateScratch->SetName(L"TLAS update scratch"));*/ +} + +void Raytracing::RebuildTLAS(ID3D12GraphicsCommandList4* pCommandList, size_t numDescs, D3D12_GPU_VIRTUAL_ADDRESS instanceDescs) +{ + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = { + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL, + .Flags = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE, + .NumDescs = static_cast(numDescs), + .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY, + .InstanceDescs = instanceDescs + }; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; + d3d12Device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo); + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = { + .DestAccelerationStructureData = tlas->GetGPUVirtualAddress(), + .Inputs = inputs, + .ScratchAccelerationStructureData = tlasScratch->GetGPUVirtualAddress() + }; + + pCommandList->BuildRaytracingAccelerationStructure(&buildDesc, 0, nullptr); + + const auto& asBarrier = CD3DX12_RESOURCE_BARRIER::UAV(tlas.get()); + pCommandList->ResourceBarrier(1, &asBarrier); +} + +uint2 Raytracing::GetScreenSize() const +{ + auto screenSize = Util::ConvertToDynamic(globals::state->screenSize); + + return { + static_cast(screenSize.x), + static_cast(screenSize.y) + }; +} + +uint2 Raytracing::GetRenderSize() +{ + auto renderSizeOut = GetScreenSize(); + +#if defined(DLSS_RR) + if (settings.Denoiser == Denoiser::DLSSRR) { + GetDLSSRROptimal(); + + if (settings.DLSSRR.QualityMode != DLSSRRQuality::NativeRes) { + renderSizeOut = { optimalSettings.optimalRenderWidth, optimalSettings.optimalRenderHeight }; + } + } else +#endif + { + uint resolutionFactor = 1; + + switch (settings.Resolution) { + case (Resolution::Half): + resolutionFactor = 2; + break; + case (Resolution::Quarter): + resolutionFactor = 4; + break; + case (Resolution::Eighth): + resolutionFactor = 8; + break; + default: + break; + } + + renderSizeOut = { renderSizeOut.x / resolutionFactor, renderSizeOut.y / resolutionFactor }; + } + + return renderSizeOut; +} + +bool Raytracing::UpdateRenderSize() +{ + uint2 renderSizeNew = GetRenderSize(); + + if (renderSize != renderSizeNew) { + renderSize = renderSizeNew; + + return true; + } + + return false; +} + +void Raytracing::DrawRTGI() +{ + // We mutex here to prevent changes to resources while the command list is in flight, we could just queue everything maybe? + std::lock_guard lock{ renderMutex }; + + if (!d3d11Context) { + logger::error("d3d11Context is nullptr"); + } + + if (!d3d11Fence) { + logger::error("d3d11Fence is nullptr"); + } + + auto& rendererRuntimeData = globals::game::renderer->GetRuntimeData(); + auto main = rendererRuntimeData.renderTargets[RE::RENDER_TARGETS::kMAIN]; + + d3d11Context->CopyResource(mainTexture->resource11, main.texture); + + if (!RaytracedShadows()) + CopyDepth(); + + if (settings.WhiteFurnace) { + float clearColor[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + d3d11Context->ClearRenderTargetView(rendererRuntimeData.renderTargets[ALBEDO].RTV, clearColor); + } + + CopyConvertTextures(); + + // Wait for D3D11 to finish + { + //d3d11Context->Flush1(D3D11_CONTEXT_TYPE_ALL, nullptr); + d3d11Context->Flush(); + DX::ThrowIfFailed(d3d11Context->Signal(d3d11Fence.get(), fenceValue)); + DX::ThrowIfFailed(commandQueue->Wait(d3d12Fence.get(), fenceValue)); + fenceValue++; + } + + auto startTime = Util::GetNowSecs(); + UpdateMeasureTime(startTime); + + if (pixCapture && (!pixCaptureStarted || pixTDR) && settings.PIXCaptureLocation == PIXCaptureLocation::GlobalIllumination) { + pixCaptureStarted = true; + + /*if (pixMultiFrame) { + PIXGpuCaptureNextFrames(L"I:/Temp/Pix/TDRCap.pix", 60); + } else {*/ + //PIXBeginCapture(PIX_CAPTURE_GPU, PIXCaptureParameters + ga->BeginCapture(); + //} + } + + UpdateInstances(); + + skinningPipeline->Dispatch(commandList.get(), d3d12Device.get()); + + UpdateBLASes(); + + // Upload buffers + lightBuffer->UploadRegion(commandList.get(), sizeof(Light) * lights.size(), 0, 0, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + if (UpdateRenderSize()) + SetupOutputRT(); + +#ifdef DLSS_RR + if (settings.Denoiser == Denoiser::DLSSRR) { + //GetDLSSRROptimal(); // TODO: Remove this once we can handle dynamic resolution changes properly + SetDLSSRROptions(); + CheckFrameConstants(); + } +#endif + + // Update framebuffer + { + frameData->ViewInverse = globals::game::frameBufferCached.GetCameraViewInverse().Transpose(); + frameData->ProjInverse = globals::game::frameBufferCached.GetCameraProjInverse().Transpose(); + + float4 position = globals::game::frameBufferCached.GetCameraPosAdjust(); + frameData->Position = float3(position.x, position.y, position.z); + + float4 positionPrev = globals::game::frameBufferCached.GetCameraPreviousPosAdjust(); + frameData->PositionPrev = float3(positionPrev.x, positionPrev.y, positionPrev.z); + + frameData->FrameCount = globals::state->frameCount; + + frameData->CameraData = Util::GetCameraData(); + + auto eye = Util::GetCameraData(0); + float2 ndcToViewMult = float2(2.0f / eye.projMat(0, 0), -2.0f / eye.projMat(1, 1)); + float2 ndcToViewAdd = float2(-1.0f / eye.projMat(0, 0), 1.0f / eye.projMat(1, 1)); + + frameData->NDCToView = float4(ndcToViewMult.x, ndcToViewMult.y, ndcToViewAdd.x, ndcToViewAdd.y); + + frameData->Roughness = settings.Roughness; + frameData->Metalness = settings.Metalness; + + frameData->Emissive = settings.Emissive; + frameData->Effect = settings.Effect; + frameData->Sky = settings.Sky; + + frameData->Lights = static_cast(lights.size()); + + frameData->PixelConeSpreadAngle = std::atan((2.0f / eye.projMat.m[1][1]) / renderSize.y); + frameData->TexLODBias = settings.TexLODBias; + + auto& sssSettings = settings.AdvancedSettings.SSSSettings; + frameData->SSSSampleCount = sssSettings.SampleCount; + frameData->SSSMaxSampleRadius = sssSettings.MaxSampleRadius; + frameData->EnableSssTransmission = sssSettings.EnableTransmission; + frameData->SSSMaterialOverride = sssSettings.MaterialOverride; + frameData->OverrideSSSTransmissionColor = sssSettings.OverrideTransmissionColor; + frameData->OverrideSSSScatteringColor = sssSettings.OverrideScatteringColor; + frameData->OverrideSSSScale = sssSettings.OverrideScale; + frameData->OverrideSSSAnisotropy = sssSettings.OverrideAnisotropy; + + frameData->RussianRoulette = settings.RussianRoulette; + + if (Util::IsInterior()) { + frameData->EmittanceColor = float3::One; + } else { + const auto* sky = RE::Sky::GetSingleton(); + + if (sky && sky->region) + frameData->EmittanceColor = Float3(sky->region->emittanceColor); + else + frameData->EmittanceColor = float3::One; + } + + auto& cloudShadows = globals::features::cloudShadows; + frameData->CloudOpacity = cloudShadows.loaded ? cloudShadows.settings.Opacity : 0.0f; + + frameData->SHaRC = settings.SHaRC.GetFrameData(settings.TraceMode == TraceMode::SHaRC); // Sets UpdatePass to true if in SHaRC mode + + frameData->DispatchSize = renderSize; + + // Update Features + { + auto wetnessEffect = globals::features::wetnessEffects.GetCommonBufferData(); + auto linearLighting = globals::features::linearLighting.GetCommonBufferData(); + + std::memcpy(&frameData->Features.ExtendedMaterial, &globals::features::extendedMaterials.settings, sizeof(CPMSettings)); + std::memcpy(&frameData->Features.WetnessEffects, &wetnessEffect, sizeof(WetnessEffectsSettings)); + std::memcpy(&frameData->Features.CloudShadows, &globals::features::cloudShadows.settings, sizeof(CloudShadowsSettings)); + std::memcpy(&frameData->Features.HairSpecular, &globals::features::hairSpecular.settings, sizeof(HairSpecularSettings)); + std::memcpy(&frameData->Features.ExtendedTranslucency, &globals::features::extendedTranslucency.GetCommonBufferData(), sizeof(ExtendedTranslucencySettings)); + std::memcpy(&frameData->Features.LinearLighting, &linearLighting, sizeof(LinearLightingSettings)); + + static_assert(sizeof(CPMSettings) == sizeof(ExtendedMaterials::Settings)); + static_assert(sizeof(WetnessEffectsSettings) == sizeof(WetnessEffects::PerFrame)); + static_assert(sizeof(CloudShadowsSettings) == sizeof(CloudShadows::Settings)); + static_assert(sizeof(HairSpecularSettings) == sizeof(HairSpecular::Settings)); + static_assert(sizeof(ExtendedTranslucencySettings) == sizeof(ExtendedTranslucency::PerFrame)); + static_assert(sizeof(LinearLightingSettings) == sizeof(LinearLighting::PerFrameData)); + } + + // Upload buffer 0, for SHaRC resolve pass + frameBuffer->Update(frameData.get(), sizeof(FrameData), 0, 0); + + if (settings.TraceMode == TraceMode::SHaRC) { + // Upload buffer 1, for main RT pass + frameData->SHaRC.UpdatePass = false; + frameBuffer->Update(frameData.get(), sizeof(FrameData), 0, 1); + } + + // Upload buffer 0 to GPU + frameBuffer->Upload(commandList.get()); + } + + BuildTLAS(); + RebuildTLAS(commandList.get(), blasInstances.size(), blasInstanceBuffer->resource->GetGPUVirtualAddress()); + + { + auto setupRTPipeline = [&]() { + commandList->SetPipelineState1(pipelineRT.get()); + commandList->SetComputeRootSignature(rootSignature.get()); + + auto* pHeap = giHeap->Heap(); + commandList->SetDescriptorHeaps(1, &pHeap); + + // Parameter 0: UAV table + commandList->SetComputeRootDescriptorTable(0, giHeap->TableGPUHandle(GIHeap::Table::UAV)); + + // Parameter 1: Fixed SRVs + commandList->SetComputeRootDescriptorTable(1, giHeap->TableGPUHandle(GIHeap::Table::SRV)); + + // Parameter 2: Vertex buffers + commandList->SetComputeRootDescriptorTable(2, giHeap->TableGPUHandle(GIHeap::Table::VertexBuffer)); + + // Parameter 3: Triangle buffers + commandList->SetComputeRootDescriptorTable(3, giHeap->TableGPUHandle(GIHeap::Table::TriangleBuffer)); + + // Parameter 4: Textures + commandList->SetComputeRootDescriptorTable(4, giHeap->TableGPUHandle(GIHeap::Table::Textures)); + + // Parameter 5: Constant buffer + commandList->SetComputeRootConstantBufferView(5, frameBuffer->resource->GetGPUVirtualAddress()); + }; + + // Raytracing + { + setupRTPipeline(); + + D3D12_DISPATCH_RAYS_DESC dispatchDesc{}; + dispatchDesc.Depth = 1; + + shaderBindingTable->FillDispatchShaderBindingTable(dispatchDesc, shaderBindingTableBuffer->resource->GetGPUVirtualAddress()); + + // SHaRC Update pass + if (settings.TraceMode == TraceMode::SHaRC) { + dispatchDesc.Width = DivideRoundUp(renderSize.x, 5.0f); + dispatchDesc.Height = DivideRoundUp(renderSize.y, 5.0f); + + commandList->DispatchRays(&dispatchDesc); + + sharcPipeline->Resolve(commandList.get(), frameBuffer->resource.get()); + + // Restore RT pipeline + commandList->SetPipelineState1(pipelineRT.get()); + commandList->SetComputeRootSignature(rootSignature.get()); + + // Restore RT pipeline + setupRTPipeline(); + + // Update Frame Buffer for main RT pass, maybe we should use two buffers? + // Using one GPU heap buffer with multiple upload buffers felt like a hack (but it works) + frameBuffer->Upload(commandList.get(), 1); + + // This function uses CopyBufferRegion to upload only the UpdatePass variable, but it failed to work... + //frameBuffer->UploadRegion(commandList.get(), sizeof(SHaRCFrameData::UpdatePass), offsetof(FrameData, SHaRC) + offsetof(SHaRCFrameData, UpdatePass), 1); + } + + // Main pass + { + dispatchDesc.Width = renderSize.x; + dispatchDesc.Height = renderSize.y; + + commandList->DispatchRays(&dispatchDesc); + + CD3DX12_RESOURCE_BARRIER rtUAVBarrier[3] = { + CD3DX12_RESOURCE_BARRIER::UAV(outputTexture->resource.get()), + CD3DX12_RESOURCE_BARRIER::UAV(specularAlbedoTexture->resource.get()), + CD3DX12_RESOURCE_BARRIER::UAV(specularHitDistanceTexture->resource.get()) + }; + + commandList->ResourceBarrier(_countof(rtUAVBarrier), rtUAVBarrier); + + if (settings.PathTracing) { + CD3DX12_RESOURCE_BARRIER ptUAVBarrier[2] = { + CD3DX12_RESOURCE_BARRIER::UAV(diffuseAlbedoPathTracingTexture->resource.get()), + CD3DX12_RESOURCE_BARRIER::UAV(normalRoughnessPathTracingTexture->resource.get()) + }; + + commandList->ResourceBarrier(_countof(ptUAVBarrier), ptUAVBarrier); + } + } + } + + if (settings.DebugOutput == DebugOutput::None) { +#ifdef DLSS_RR + if (settings.Denoiser == Denoiser::DLSSRR) { + { + auto screenSize = GetScreenSize(); + + sl::Extent inputExtent{ 0, 0, renderSize.x, renderSize.y }; + sl::Extent inputNativeExtent{ 0, 0, screenSize.x, screenSize.y }; + sl::Extent outputExtent{ 0, 0, screenSize.x, screenSize.y }; + + uint32_t state = settings.PathTracing ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE; + + sl::Resource colorIn = { sl::ResourceType::eTex2d, outputTexture->resource.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS }; + sl::Resource colorOut = { sl::ResourceType::eTex2d, mainTexture->resource.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE }; + sl::Resource depth = { sl::ResourceType::eTex2d, depthTexture->resource.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE }; + sl::Resource mvec = { sl::ResourceType::eTex2d, motionVectorsTexture->resource.get(), 0 }; + sl::Resource diffuseAlbedo = { sl::ResourceType::eTex2d, settings.PathTracing ? diffuseAlbedoPathTracingTexture->resource.get() : diffuseAlbedoTexture->resource.get(), state }; + sl::Resource specularAlbedo = { sl::ResourceType::eTex2d, specularAlbedoTexture->resource.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS }; + sl::Resource normalRoughness = { sl::ResourceType::eTex2d, settings.PathTracing ? normalRoughnessPathTracingTexture->resource.get() : normalRoughnessTexture->resource.get(), state }; + sl::Resource specHitDistance = { sl::ResourceType::eTex2d, specularHitDistanceTexture->resource.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS }; + + sl::ResourceTag colorInTag = sl::ResourceTag{ &colorIn, sl::kBufferTypeScalingInputColor, sl::ResourceLifecycle::eOnlyValidNow, &inputExtent }; + sl::ResourceTag colorOutTag = sl::ResourceTag{ &colorOut, sl::kBufferTypeScalingOutputColor, sl::ResourceLifecycle::eOnlyValidNow, &outputExtent }; + sl::ResourceTag depthTag = sl::ResourceTag{ &depth, sl::kBufferTypeDepth, sl::ResourceLifecycle::eValidUntilPresent, &inputNativeExtent }; + sl::ResourceTag mvecTag = sl::ResourceTag{ &mvec, sl::kBufferTypeMotionVectors, sl::ResourceLifecycle::eValidUntilPresent, &inputExtent }; + sl::ResourceTag diffuseAlbedoTag = sl::ResourceTag{ &diffuseAlbedo, sl::kBufferTypeAlbedo, sl::ResourceLifecycle::eValidUntilPresent, &inputExtent }; + sl::ResourceTag specularAlbedoTag = sl::ResourceTag{ &specularAlbedo, sl::kBufferTypeSpecularAlbedo, sl::ResourceLifecycle::eValidUntilPresent, &inputExtent }; + sl::ResourceTag normalRoughnessTag = sl::ResourceTag{ &normalRoughness, sl::kBufferTypeNormalRoughness, sl::ResourceLifecycle::eValidUntilPresent, &inputExtent }; + sl::ResourceTag specHitDistanceTag = sl::ResourceTag{ &specHitDistance, sl::kBufferTypeSpecularHitDistance, sl::ResourceLifecycle::eValidUntilPresent, &inputExtent }; + + sl::ResourceTag resourceTags[] = { colorInTag, colorOutTag, depthTag, mvecTag, diffuseAlbedoTag, specularAlbedoTag, normalRoughnessTag, specHitDistanceTag }; + if (SL_FAILED(result, slSetTag(slViewportHandle, resourceTags, _countof(resourceTags), commandList.get()))) { + logger::error("[DLSS RR] Failed to set DLSS RR tags, error: {}", magic_enum::enum_name(result)); + return; + } + } + + const sl::BaseStructure* inputs[] = { &slViewportHandle }; + + if (SL_FAILED(result, slEvaluateFeature(sl::kFeatureDLSS_RR, *frameToken, inputs, _countof(inputs), commandList.get()))) { + logger::error("[DLSS RR] Failed to evaluate DLSS RR feature, error: {}", magic_enum::enum_name(result)); + } + } else +#endif + if (settings.Denoiser == Denoiser::NRD) { + nrdPipeline->Denoise(commandList.get()); + } else { + auto barrier = CD3DX12_RESOURCE_BARRIER::Transition(mainTexture->resource.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->ResourceBarrier(1, &barrier); + + outputTexture->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->CopyResource(mainTexture->resource.get(), outputTexture->resource.get()); + outputTexture->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + barrier = CD3DX12_RESOURCE_BARRIER::Transition(mainTexture->resource.get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier); + } + } else { + auto barrier = CD3DX12_RESOURCE_BARRIER::Transition(mainTexture->resource.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->ResourceBarrier(1, &barrier); + + if (settings.DebugOutput == DebugOutput::Output) { + outputTexture->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->CopyResource(mainTexture->resource.get(), outputTexture->resource.get()); + outputTexture->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + } else if (settings.DebugOutput == DebugOutput::Reflectance) { + const auto& barrierCopy = CD3DX12_RESOURCE_BARRIER::Transition(specularAlbedoTexture->resource.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->ResourceBarrier(1, &barrierCopy); + + commandList->CopyResource(mainTexture->resource.get(), specularAlbedoTexture->resource.get()); + + const auto& barrierUAV = CD3DX12_RESOURCE_BARRIER::Transition(specularAlbedoTexture->resource.get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + commandList->ResourceBarrier(1, &barrierUAV); + } else if (settings.DebugOutput == DebugOutput::SpecularHitDistance) { + specularHitDistanceTexture->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->CopyResource(mainTexture->resource.get(), specularHitDistanceTexture->resource.get()); + specularHitDistanceTexture->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } else if (settings.DebugOutput == DebugOutput::NormalRoughnessGbuffer) { + auto normalRoughnessProxy = settings.PathTracing ? normalRoughnessPathTracingTexture->resource.get() : normalRoughnessTexture->resource.get(); + auto transitionCopy = CD3DX12_RESOURCE_BARRIER::Transition(normalRoughnessProxy, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->ResourceBarrier(1, &transitionCopy); + + commandList->CopyResource(mainTexture->resource.get(), normalRoughnessProxy); + + auto transitionNonPixelRes = CD3DX12_RESOURCE_BARRIER::Transition(normalRoughnessProxy, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &transitionNonPixelRes); + } else if (settings.DebugOutput == DebugOutput::GeometryNormalMetalness) { + auto transitionCopy = CD3DX12_RESOURCE_BARRIER::Transition(GNMDTexture.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->ResourceBarrier(1, &transitionCopy); + + commandList->CopyResource(mainTexture->resource.get(), GNMDTexture.get()); + + auto transitionNonPixelRes = CD3DX12_RESOURCE_BARRIER::Transition(GNMDTexture.get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &transitionNonPixelRes); + } else if (settings.DebugOutput == DebugOutput::Albedo) { + auto transitionCopy = CD3DX12_RESOURCE_BARRIER::Transition(albedoTexture.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->ResourceBarrier(1, &transitionCopy); + + commandList->CopyResource(mainTexture->resource.get(), albedoTexture.get()); + + auto transitionNonPixelRes = CD3DX12_RESOURCE_BARRIER::Transition(albedoTexture.get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &transitionNonPixelRes); + } else if (settings.DebugOutput == DebugOutput::Diffuse) { + auto diffuseAlbedoProxy = settings.PathTracing ? diffuseAlbedoPathTracingTexture->resource.get() : diffuseAlbedoTexture->resource.get(); + auto transitionCopy = CD3DX12_RESOURCE_BARRIER::Transition(diffuseAlbedoProxy, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_COPY_SOURCE); + commandList->ResourceBarrier(1, &transitionCopy); + + commandList->CopyResource(mainTexture->resource.get(), diffuseAlbedoProxy); + + auto transitionNonPixelRes = CD3DX12_RESOURCE_BARRIER::Transition(diffuseAlbedoProxy, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &transitionNonPixelRes); + } + + barrier = CD3DX12_RESOURCE_BARRIER::Transition(mainTexture->resource.get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(1, &barrier); + } + + DX::ThrowIfFailed(commandList->Close()); + + if (canMeasure) { + mainCPUTime = static_cast((Util::GetNowSecs() - startTime) * 1000.0); + startTime = Util::GetNowSecs(); + } + + ID3D12CommandList* commandListPtr = commandList.get(); + commandQueue->ExecuteCommandLists(1, &commandListPtr); + } + + // Wait for D3D12 to finish + DX::ThrowIfFailed(commandQueue->Signal(d3d12Fence.get(), fenceValue)); + + // Wait until GPU is done with previous frame + if (d3d12Fence->GetCompletedValue() < fenceValue) { + DX::ThrowIfFailed(d3d12Fence->SetEventOnCompletion(fenceValue, fenceEvent)); + WaitForSingleObject(fenceEvent, INFINITE); + } + + if (frameChecker.IsNewFrame()) { + //logger::info("[RT] Executed Frame: {}", frameIndex); + frameIndex++; + } + + if (canMeasure) + mainGPUTime = static_cast((Util::GetNowSecs() - startTime) * 1000.0); + + if (pixCapture && pixCaptureStarted && !pixTDR && settings.PIXCaptureLocation == PIXCaptureLocation::GlobalIllumination) { + ga->EndCapture(); + pixCapture = pixMultiFrame; + pixCaptureStarted = false; + } + + DX::ThrowIfFailed(d3d11Context->Wait(d3d11Fence.get(), fenceValue)); + fenceValue++; + + //New frame, reset + DX::ThrowIfFailed(commandAllocator->Reset()); + DX::ThrowIfFailed(commandList->Reset(commandAllocator.get(), nullptr)); + + /*if (pixCapture) { + pixCaptureStarted = true; + ga->BeginCapture(); + }*/ + + PostRaytraceCleanup(); + + // Check for camera movement for accumulation denoiser + if (settings.Denoiser == Denoiser::Accumulation && settings.PathTracing) { + const auto& currentViewProj = globals::game::frameBufferCached.GetCameraViewProjUnjittered(); + const auto& prevViewProj = globals::game::frameBufferCached.GetCameraPreviousViewProjUnjittered(); + + bool matrixChanged = std::memcmp(¤tViewProj, &prevViewProj, sizeof(currentViewProj)) != 0; + + float3 posDelta = frameData->Position - frameData->PositionPrev; + float movementSq = posDelta.x * posDelta.x + posDelta.y * posDelta.y + posDelta.z * posDelta.z; + const float posThreshold = 0.01f; + + cameraHasMoved = matrixChanged || (movementSq > posThreshold); + + if (cameraHasMoved) { + accumulatedFrames = 0; + } else { + accumulatedFrames++; + } + } + + if (settings.DebugOutput == DebugOutput::None) { + if (settings.Denoiser == Denoiser::SVGF) { + auto sampler = samplerState.get(); + d3d11Context->CSSetSamplers(0, 1, &sampler); + + auto* renderSizeCB = renderResCB->CB(); + d3d11Context->CSSetConstantBuffers(0, 1, &renderSizeCB); + + auto* frameBufferCB = *globals::game::perFrame.get(); + d3d11Context->CSSetConstantBuffers(12, 1, &frameBufferCB); + + // Diffuse + svgfDenoiser->Denoise(d3d11Context.get(), renderSize, settings.SVGFDiffuse, normalRoughnessTexture.get(), mainTexture.get(), true); + + // Specular + svgfDenoiser->Denoise(d3d11Context.get(), renderSize, settings.SVGFSpecular, normalRoughnessTexture.get(), specularAlbedoTexture.get(), false); + } + } + + // True Linear to Gamma + if (settings.ConvertToGamma || !settings.PathTracing && settings.Denoiser == Denoiser::SVGF) { + d3d11Context->CSSetShader(compositeCS.get(), nullptr, 0); + + d3d11Context->CopyResource(main.textureCopy, main.texture); + + eastl::array srvs = { + main.SRVCopy, + diffuseAlbedoTexture->srv, + mainTexture->srv, + specularAlbedoTexture->srv + }; + d3d11Context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + + d3d11Context->CSSetUnorderedAccessViews(0, 1, &main.UAV, nullptr); + + auto dispatchCount = Util::GetScreenDispatchCount(); + d3d11Context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + } else if (settings.PathTracing && settings.Denoiser == Denoiser::Accumulation) { + if (accumulatedFrames == 0 || cameraHasMoved) { + d3d11Context->CopyResource(accumulationTexture->resource11, mainTexture->resource11); + d3d11Context->CopyResource(main.texture, mainTexture->resource11); + } else { + accumulationCBData->AccumulatedFrames = (uint)accumulatedFrames; + accumulationCB->Update(accumulationCBData.get(), sizeof(AccumulationCBData)); + + auto* accumulationCBPtr = accumulationCB->CB(); + d3d11Context->CSSetConstantBuffers(2, 1, &accumulationCBPtr); + + d3d11Context->CSSetShader(accumulationCS.get(), nullptr, 0); + + d3d11Context->CopyResource(accumulationTextureCopy->resource11, accumulationTexture->resource11); + + eastl::array srvs = { + accumulationTextureCopy->srv, + mainTexture->srv + }; + d3d11Context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + + ID3D11UnorderedAccessView* accumulationUAV = accumulationTexture->uav; + d3d11Context->CSSetUnorderedAccessViews(0, 1, &accumulationUAV, nullptr); + + auto dispatchCount = Util::GetScreenDispatchCount(); + d3d11Context->Dispatch(dispatchCount.x, dispatchCount.y, 1); + + d3d11Context->CopyResource(main.texture, accumulationTexture->resource11); + } + } else { + d3d11Context->CopyResource(main.texture, mainTexture->resource11); + } + + // Clear specular if Path Tracing is enabled + if (settings.PathTracing) { + auto renderer = globals::game::renderer; + + float clearColor[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + d3d11Context->ClearRenderTargetView(renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kINDIRECT_DOWNSCALED].RTV, clearColor); + } +} + +void Raytracing::UpdateShadowsFrameBuffer() +{ + shadowsCBData->CameraData = Util::GetCameraData(); + + auto eye = Util::GetCameraData(0); + float2 ndcToViewMult = float2(2.0f / eye.projMat(0, 0), -2.0f / eye.projMat(1, 1)); + float2 ndcToViewAdd = float2(-1.0f / eye.projMat(0, 0), 1.0f / eye.projMat(1, 1)); + shadowsCBData->NDCToView = float4(ndcToViewMult.x, ndcToViewMult.y, ndcToViewAdd.x, ndcToViewAdd.y); + + shadowsCBData->ViewInverse = globals::game::frameBufferCached.GetCameraViewInverse().Transpose(); + + float4 cameraPosition = globals::game::frameBufferCached.GetCameraPosAdjust(); + shadowsCBData->Position = float4(cameraPosition.x, cameraPosition.y, cameraPosition.z, 0.0f); + + if (shadowLight) { + auto direction = Normalize(Float3(-shadowLight->GetShadowDirectionalLightRuntimeData().sunVector)); + shadowsCBData->Direction = float4(direction.x, direction.y, direction.z, 0.0f); + } + + shadowsCB->Update(shadowsCBData.get(), sizeof(ShadowsFrameData)); +} + +void Raytracing::RenderShadows() +{ + //logger::info("[RT] RenderShadows - ShadowLight [0x{:x}], TLAS [0x{:x}]", reinterpret_cast(shadowLight), reinterpret_cast(tlas.get())); + + if (!shadowLight) + return; + + if (!shadowFrameChecker.IsNewFrame()) + return; + + //std::lock_guard lock{ renderMutex }; + + auto rendererRuntimeData = globals::game::renderer->GetRuntimeData(); + auto shadowMask = rendererRuntimeData.renderTargets[RE::RENDER_TARGETS::kSHADOW_MASK]; + + CopyDepth(); + + // Tell DX11 to finish and wait + //d3d11Context->Flush1(D3D11_CONTEXT_TYPE_ALL, nullptr); + d3d11Context->Flush(); + DX::ThrowIfFailed(d3d11Context->Signal(d3d11Fence.get(), fenceValue)); + DX::ThrowIfFailed(commandQueue->Wait(d3d12Fence.get(), fenceValue)); + fenceValue++; + + auto startTime = Util::GetNowSecs(); + UpdateMeasureTime(startTime); + + if (pixCapture && (!pixCaptureStarted || pixTDR) && settings.PIXCaptureLocation == PIXCaptureLocation::Shadows) { + pixCaptureStarted = true; + + /*if (pixMultiFrame) { + PIXGpuCaptureNextFrames(L"I:/Temp/Pix/TDRCap.pix", 60); + } else {*/ + //PIXBeginCapture(PIX_CAPTURE_GPU, PIXCaptureParameters + ga->BeginCapture(); + //} + } + + // Do DX12 work... + UpdateShadowInstances(); + + skinningPipeline->Dispatch(commandList.get(), d3d12Device.get()); + + //UpdateDynamicSkinning(commandList.get()); + + shadowsCB->Upload(commandList.get()); + + BuildTLAS(); + RebuildTLAS(commandList.get(), blasShadowInstances.size(), blasShadowInstanceBuffer->resource->GetGPUVirtualAddress()); + + commandList->SetPipelineState1(shadowPipeline.get()); + commandList->SetComputeRootSignature(shadowRS.get()); + + auto computeHeapPtr = shadowHeap->Heap(); + commandList->SetDescriptorHeaps(1, &computeHeapPtr); + + // UAV table + commandList->SetComputeRootDescriptorTable(0, shadowHeap->TableGPUHandle(ShadowsHeap::Table::UAV)); + + // SRV table + commandList->SetComputeRootDescriptorTable(1, shadowHeap->TableGPUHandle(ShadowsHeap::Table::SRV)); + + // Constant buffer + commandList->SetComputeRootConstantBufferView(2, shadowsCB->resource->GetGPUVirtualAddress()); + + CD3DX12_RESOURCE_BARRIER ctuBarrier[1] = { + CD3DX12_RESOURCE_BARRIER::Transition(shadowMaskTexture->resource.get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + }; + commandList->ResourceBarrier(_countof(ctuBarrier), ctuBarrier); + + // Dispatch + auto shadowMaskDesc = shadowMaskTexture->resource->GetDesc(); + + D3D12_GPU_VIRTUAL_ADDRESS sbtAddr = shadowSBTBuffer->resource->GetGPUVirtualAddress(); + + D3D12_DISPATCH_RAYS_DESC dispatchDesc = { + .RayGenerationShaderRecord = { + .StartAddress = sbtAddr, + .SizeInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES }, + .MissShaderTable = { .StartAddress = sbtAddr + D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, .SizeInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES }, + .HitGroupTable = { .StartAddress = sbtAddr + 2 * D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, .SizeInBytes = D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES }, + .Width = static_cast(shadowMaskDesc.Width), + .Height = shadowMaskDesc.Height, + .Depth = 1 + }; + + commandList->DispatchRays(&dispatchDesc); + + CD3DX12_RESOURCE_BARRIER utcBarrier[1] = { + CD3DX12_RESOURCE_BARRIER::Transition(shadowMaskTexture->resource.get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COMMON), + }; + commandList->ResourceBarrier(_countof(utcBarrier), utcBarrier); + + DX::ThrowIfFailed(commandList->Close()); + + if (canMeasure) { + shadowsCPUTime = static_cast((Util::GetNowSecs() - startTime) * 1000.0); + startTime = Util::GetNowSecs(); + } + + ID3D12CommandList* commandListPtr = commandList.get(); + commandQueue->ExecuteCommandLists(1, &commandListPtr); + + // Wait for D3D12 to finish and signal DX11 + DX::ThrowIfFailed(commandQueue->Signal(d3d12Fence.get(), fenceValue)); + + // Wait for GPU + if (d3d12Fence->GetCompletedValue() < fenceValue) { + DX::ThrowIfFailed(d3d12Fence->SetEventOnCompletion(fenceValue, fenceEvent)); + WaitForSingleObject(fenceEvent, INFINITE); + } + + if (frameChecker.IsNewFrame()) + frameIndex++; + + if (canMeasure) + shadowsGPUTime = static_cast((Util::GetNowSecs() - startTime) * 1000.0); + + if (pixCapture && pixCaptureStarted && !pixTDR && settings.PIXCaptureLocation == PIXCaptureLocation::Shadows) { + ga->EndCapture(); + pixCapture = pixMultiFrame; // Do not stop capture when doing multiframe + pixCaptureStarted = false; + } + + DX::ThrowIfFailed(d3d11Context->Wait(d3d11Fence.get(), fenceValue)); + fenceValue++; + + // Reset for next command list usage + DX::ThrowIfFailed(commandAllocator->Reset()); + DX::ThrowIfFailed(commandList->Reset(commandAllocator.get(), nullptr)); + + PostRaytraceCleanup(); + + d3d11Context->CopyResource(shadowMask.texture, shadowMaskTexture->resource11); +} + +void Raytracing::DataLoaded() +{ + CellAttachDetachEventHandler::Register(); + BGSActorCellEventHandler::Register(); +} + +void Raytracing::PostPostLoad() +{ + Hooks::Install(); + + RE::GetINISetting("bReflectLODLand:Water")->data.b = false; + RE::GetINISetting("bReflectLODObjects:Water")->data.b = false; + RE::GetINISetting("bReflectLODTrees:Water")->data.b = false; + RE::GetINISetting("bReflectSky:Water")->data.b = true; + + //MenuOpenCloseEventHandler::Register(); + //TESLoadGameEventHandler::Register(); + + TESObjectLoadedEventHandler::Register(); + + //TESCellFullyLoadedEventHandler::Register(); +} +static std::wstring GetLatestWinPixGpuCapturerPath() +{ + LPWSTR programFilesPath = nullptr; + SHGetKnownFolderPath(FOLDERID_ProgramFiles, KF_FLAG_DEFAULT, NULL, &programFilesPath); + + std::filesystem::path pixInstallationPath = programFilesPath; + pixInstallationPath /= "Microsoft PIX"; + + std::wstring newestVersionFound; + + for (auto const& directory_entry : std::filesystem::directory_iterator(pixInstallationPath)) { + if (directory_entry.is_directory()) { + if (newestVersionFound.empty() || newestVersionFound < directory_entry.path().filename().c_str()) { + newestVersionFound = directory_entry.path().filename().c_str(); + } + } + } + + if (newestVersionFound.empty()) { + // TODO: Error, no PIX installation found + } + + return pixInstallationPath / newestVersionFound / L"WinPixGpuCapturer.dll"; +} + +void DumpDredBreadcrumbs(const D3D12_DRED_AUTO_BREADCRUMBS_OUTPUT1& breadcrumbsOutput) +{ + const D3D12_AUTO_BREADCRUMB_NODE1* pNode = breadcrumbsOutput.pHeadAutoBreadcrumbNode; + + while (pNode) { + const UINT32 completedOps = *pNode->pLastBreadcrumbValue; + const UINT32 totalOps = pNode->BreadcrumbCount; + + logger::error("[RT] Command List: {}", pNode->pCommandListDebugNameA ? pNode->pCommandListDebugNameA : ""); + logger::error("[RT] Queue: {}", pNode->pCommandQueueDebugNameA ? pNode->pCommandQueueDebugNameA : ""); + logger::error("[RT] Completed Ops: {} / {}", completedOps, totalOps); + + if (pNode->pCommandHistory && totalOps > 0) { + // Last executed command + UINT32 lastIndex = (completedOps > 0) ? completedOps - 1 : 0; + auto lastOp = pNode->pCommandHistory[lastIndex]; + logger::error("[RT] Last Executed Command: {}", magic_enum::enum_name(lastOp)); + + // Next (likely faulting) command + if (completedOps < totalOps) { + auto nextOp = pNode->pCommandHistory[completedOps]; + logger::error("[RT] Next (Likely Faulting) Command: {}", magic_enum::enum_name(nextOp)); + } + } + + logger::error(""); // empty line for readability + pNode = pNode->pNext; + } +} + +void Raytracing::DeviceRemovedHandler() +{ + if (settings.EnablePIXCapture) { + ga->EndCapture(); + pixCapture = false; + pixCaptureStarted = false; + pixMultiFrame = false; + pixTDR = false; + } + + if (settings.EnableDebugDevice) { + // 1. Device removed reason + HRESULT reason = d3d12Device->GetDeviceRemovedReason(); + logger::error("[RT] ============================================================"); + logger::error("[RT] DEVICE REMOVED! HRESULT = 0x{:08X}", reason); + + winrt::com_ptr dred; + if (FAILED(d3d12Device->QueryInterface(IID_PPV_ARGS(&dred)))) { + logger::error("[RT] DRED not available on this device."); + return; + } + + // --------------------------------------------------------------------- + // 2. Auto Breadcrumbs + // --------------------------------------------------------------------- + D3D12_DRED_AUTO_BREADCRUMBS_OUTPUT1 bcOutput = {}; + if (SUCCEEDED(dred->GetAutoBreadcrumbsOutput1(&bcOutput)) && bcOutput.pHeadAutoBreadcrumbNode) { + DumpDredBreadcrumbs(bcOutput); + } else { + logger::error("[RT] No breadcrumbs available."); + } + } +} + +void Raytracing::InitD3D12(ID3D11Device* ppDevice, ID3D11DeviceContext* pImmediateContext, IDXGIAdapter* a_adapter) +{ + Hooks::InstallD3D11Hooks(ppDevice); + + if (settings.EnablePIXCapture) { + // Check to see if a copy of WinPixGpuCapturer.dll has already been injected into the application. + // This may happen if the application is launched through the PIX UI. + if (GetModuleHandle(L"WinPixGpuCapturer.dll") == 0) { + auto pixGPUCapturerPath = GetLatestWinPixGpuCapturerPath(); + + if (pixGPUCapturerPath.empty()) { + logger::warn("[RT] PIX capture is enabled but binaries where not found."); + } else { + LoadLibrary(pixGPUCapturerPath.c_str()); + } + } + } + + logger::info("[RT] Creating D3D12 device"); + + // Set Device + DX::ThrowIfFailed(ppDevice->QueryInterface(IID_PPV_ARGS(&d3d11Device))); + + // Set Context Device + DX::ThrowIfFailed(pImmediateContext->QueryInterface(IID_PPV_ARGS(&d3d11Context))); + + bool debugDevice = !settings.EnablePIXCapture && settings.EnableDebugDevice; + + // Create debug device + if (debugDevice) { + winrt::com_ptr debugController; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) { + debugController->EnableDebugLayer(); + debugController->SetEnableGPUBasedValidation(TRUE); + } else { + logger::critical("[RT] Debug layer creation failed."); + } + + winrt::com_ptr pDredSettings; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&pDredSettings)))) { + pDredSettings->SetAutoBreadcrumbsEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + pDredSettings->SetPageFaultEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + } + } + + if (settings.EnablePIXCapture) { + DX::ThrowIfFailed(DXGIGetDebugInterface1(0, IID_PPV_ARGS(&ga))); + } + + // Create Device + { + DX::ThrowIfFailed(D3D12CreateDevice(a_adapter, D3D_FEATURE_LEVEL_12_1, IID_PPV_ARGS(&d3d12Device))); + + // Check hardware raytracing tier + { + D3D12_FEATURE_DATA_D3D12_OPTIONS5 options5 = {}; + if (SUCCEEDED(d3d12Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5)))) { + if (options5.RaytracingTier != D3D12_RAYTRACING_TIER_NOT_SUPPORTED) + logger::info("[RT] Hardware ray tracing supported! Tier: {}", magic_enum::enum_name(options5.RaytracingTier)); + else + logger::warn("[RT] Hardware ray tracing not supported."); + } + } + + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + queueDesc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL; + queueDesc.NodeMask = 0; + + DX::ThrowIfFailed(d3d12Device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&commandQueue))); + + DX::ThrowIfFailed(d3d12Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&commandAllocator))); + DX::ThrowIfFailed(d3d12Device->CreateCommandList1(0, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_LIST_FLAG_NONE, IID_PPV_ARGS(&commandList))); + + DX::ThrowIfFailed(commandQueue->SetName(L"Command Queue")); + DX::ThrowIfFailed(commandAllocator->SetName(L"Command Allocator")); + DX::ThrowIfFailed(commandList->SetName(L"Command List")); + + DX::ThrowIfFailed(commandAllocator->Reset()); + DX::ThrowIfFailed(commandList->Reset(commandAllocator.get(), nullptr)); + //DX::ThrowIfFailed(commandList->Close()); + } + + if (debugDevice) { + winrt::com_ptr infoQueue; + if (SUCCEEDED(d3d12Device->QueryInterface(IID_PPV_ARGS(&infoQueue)))) { + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, TRUE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, FALSE); + } else { + logger::critical("[RT] Debug break creation failed."); + } + } + + // Create Interop + { + HANDLE sharedFenceHandle; + DX::ThrowIfFailed(d3d12Device->CreateFence(fenceValue, D3D12_FENCE_FLAG_SHARED, IID_PPV_ARGS(&d3d12Fence))); + DX::ThrowIfFailed(d3d12Device->CreateSharedHandle(d3d12Fence.get(), nullptr, GENERIC_ALL, nullptr, &sharedFenceHandle)); + DX::ThrowIfFailed(d3d11Device->OpenSharedFence(sharedFenceHandle, IID_PPV_ARGS(&d3d11Fence))); + CloseHandle(sharedFenceHandle); + } + + // D3D12 Memory Allocator + { + D3D12MA::ALLOCATOR_DESC allocatorDesc = {}; + allocatorDesc.pDevice = d3d12Device.get(); + allocatorDesc.pAdapter = a_adapter; + allocatorDesc.Flags = D3D12MA_RECOMMENDED_ALLOCATOR_FLAGS; + + DX::ThrowIfFailed(D3D12MA::CreateAllocator(&allocatorDesc, allocator.put())); + } + + // D3D12MA Pools + { + // Upload pool + { + D3D12MA::POOL_DESC poolDesc = {}; + poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_UPLOAD; + poolDesc.Flags = D3D12MA_RECOMMENDED_POOL_FLAGS; + poolDesc.HeapFlags = D3D12MA_RECOMMENDED_HEAP_FLAGS | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, uploadPool.put())); + } + + // Default pools + { + D3D12MA::POOL_DESC poolDesc = {}; + poolDesc.HeapProperties.Type = D3D12_HEAP_TYPE_DEFAULT; + poolDesc.Flags = D3D12MA_RECOMMENDED_POOL_FLAGS; + poolDesc.HeapFlags = D3D12MA_RECOMMENDED_HEAP_FLAGS | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS; + + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, dynamicVertexPool.put())); + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, vertexPool.put())); + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, vertexCopyPool.put())); + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, skinningPool.put())); + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, trianglePool.put())); + + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, blasScratchPool.put())); + DX::ThrowIfFailed(allocator->CreatePool(&poolDesc, blasPool.put())); + } + } + + if (settings.EnableDebugDevice || settings.EnablePIXCapture) { + HANDLE disconnectEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + DX::ThrowIfFailed(d3d12Fence->SetEventOnCompletion(UINT64_MAX, disconnectEvent)); + + std::thread([this, disconnectEvent]() { + WaitForSingleObject(disconnectEvent, INFINITE); + DeviceRemovedHandler(); + }).detach(); + } +} + +void Raytracing::CreateRootSignature() +{ + // UAV range + giHeap->CreateTable( + GIHeap::Table::UAV, + D3D12_DESCRIPTOR_RANGE_TYPE_UAV, + { { GIHeap::Slot::Output, 1 }, + { GIHeap::Slot::DiffuseAlbedoPathTracing, 1 }, + { GIHeap::Slot::NormalRoughnessPathTracing, 1 }, + { GIHeap::Slot::Reflectance, 1 }, + { GIHeap::Slot::SpecularHitDist, 1 }, + { GIHeap::Slot::SHaRCHashEntries, 1 }, + { GIHeap::Slot::SHaRCLock, 1 }, + { GIHeap::Slot::SHaRCAccumulation, 1 }, + { GIHeap::Slot::SHaRCResolved, 1 } }); + + // Fixed SRV ranges + giHeap->CreateTable( + GIHeap::Table::SRV, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { GIHeap::Slot::Main, 1, 0 }, + { GIHeap::Slot::Depth, 1, 0 }, + { GIHeap::Slot::Albedo, 1, 0 }, + { GIHeap::Slot::NormalRoughness, 1, 0 }, + { GIHeap::Slot::GNMD, 1, 0 }, + { GIHeap::Slot::TLAS, 1, 0 }, + { GIHeap::Slot::SkyHemisphere, 1, 0 }, + { GIHeap::Slot::Lights, 1, 0 }, + { GIHeap::Slot::Shapes, 1, 0 }, + { GIHeap::Slot::Instances, 1, 0 } }); + + // Vertex buffers (unbounded) + giHeap->CreateTable( + GIHeap::Table::VertexBuffer, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { GIHeap::Slot::Vertices, UINT_MAX, 1, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE } }); + + // Triangle buffers (unbounded) + giHeap->CreateTable( + GIHeap::Table::TriangleBuffer, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { GIHeap::Slot::Triangles, UINT_MAX, 2, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE } }); + + // Textures (unbounded) + giHeap->CreateTable( + GIHeap::Table::Textures, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { GIHeap::Slot::Textures, UINT_MAX, 3, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE } }); + + auto rootParameters = giHeap->GetRootParameters(); + + CD3DX12_ROOT_PARAMETER1 constantRootParam; + constantRootParam.InitAsConstantBufferView(0, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE); + rootParameters.push_back(constantRootParam); + + CD3DX12_STATIC_SAMPLER_DESC staticSampler(0); // register s0 + + auto flags = D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; + + // Create root signature + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSigDesc; + rootSigDesc.Init_1_1( + static_cast(rootParameters.size()), + rootParameters.data(), + 1, + &staticSampler, + flags); + + winrt::com_ptr signature; + winrt::com_ptr error; + + HRESULT hr = D3DX12SerializeVersionedRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1_1, signature.put(), error.put()); + + if (FAILED(hr)) { + if (error) { + logger::error("[RT] D3DX12SerializeVersionedRootSignature {}", (char*)error->GetBufferPointer()); + } + DX::ThrowIfFailed(hr); + } + + DX::ThrowIfFailed(d3d12Device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&rootSignature))); + DX::ThrowIfFailed(rootSignature->SetName(L"RT Root Signature")); +} + +void Raytracing::CreateShadowsRootSignature() +{ + // UAV range + shadowHeap->CreateTable( + ShadowsHeap::Table::UAV, + D3D12_DESCRIPTOR_RANGE_TYPE_UAV, + { { ShadowsHeap::Slot::ShadowMask, 1 } }); + + // SRV + shadowHeap->CreateTable( + ShadowsHeap::Table::SRV, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { ShadowsHeap::Slot::Depth, 1 }, + { ShadowsHeap::Slot::TLAS, 1 } }); + + auto rootParameters = shadowHeap->GetRootParameters(); + + CD3DX12_ROOT_PARAMETER1 constantRootParam; + constantRootParam.InitAsConstantBufferView(0, 0); + rootParameters.push_back(constantRootParam); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSigDesc; + rootSigDesc.Init_1_1( + static_cast(rootParameters.size()), + rootParameters.data(), + 0, + nullptr, + D3D12_ROOT_SIGNATURE_FLAG_NONE); + + winrt::com_ptr signature; + winrt::com_ptr error; + + HRESULT hr = D3DX12SerializeVersionedRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1_1, signature.put(), error.put()); + + if (FAILED(hr)) { + if (error) { + logger::error("[RT] D3DX12SerializeVersionedRootSignature {}", (char*)error->GetBufferPointer()); + } + DX::ThrowIfFailed(hr); + } + + DX::ThrowIfFailed(d3d12Device->CreateRootSignature(0, signature->GetBufferPointer(), signature->GetBufferSize(), IID_PPV_ARGS(&shadowRS))); + DX::ThrowIfFailed(shadowRS->SetName(L"Shadow Root Signature")); +} + +void Raytracing::ClearShaderCache() +{ + copyDepthCS = nullptr; // This is actually optional + CompileShaders(); +} + +void Raytracing::CompileShaders() +{ + if (!rootSignature) { + CreateRootSignature(); + CompileRTGIShaders(); + } + + if (!shadowRS) { + CreateShadowsRootSignature(); + CompileRTShadowsShaders(); + } + + CompileComputeShaders(); +} + +void Raytracing::CompileRTGIShaders() +{ + const auto bouncesWStr = std::to_wstring(settings.Bounces); + const auto samplesWStr = std::to_wstring(settings.SamplesPerPixel); + + eastl::vector defines = { + { L"MAX_BOUNCES", bouncesWStr.c_str() }, + { L"MAX_SAMPLES", samplesWStr.c_str() }, + }; + + auto& advSettings = settings.AdvancedSettings; + + if (advSettings.RIS.Enabled) + defines.emplace_back(L"RIS"); + + const auto risMaxCandidates = std::to_wstring(static_cast(advSettings.RIS.MaxCandidates)); + defines.emplace_back(L"RIS_MAX_CANDIDATES", risMaxCandidates.c_str()); + + if (advSettings.GGXEnergyConservation) + defines.emplace_back(L"GGX_ENERGY_CONSERVATION"); + + const auto hairMode = std::to_wstring(static_cast(advSettings.HairBSDF)); + defines.emplace_back(L"HAIR_MODE", hairMode.c_str()); + + if (advSettings.SSSSettings.Enabled) + defines.emplace_back(L"SUBSURFACE_SCATTERING"); + + const auto diffuseMode = std::to_wstring(static_cast(advSettings.DiffuseBRDF)); + defines.emplace_back(L"DIFFUSE_MODE", diffuseMode.c_str()); + + const auto lightEvalMode = std::to_wstring(static_cast(advSettings.LightEvalMode)); + defines.emplace_back(L"LIGHTEVAL_MODE", lightEvalMode.c_str()); + + const auto lightingMode = std::to_wstring(static_cast(advSettings.LightingMode)); + defines.emplace_back(L"LIGHTING_MODE", lightingMode.c_str()); + + if (settings.WhiteFurnace) + defines.emplace_back(L"DEBUG_WHITE_FURNACE"); + + if (settings.TraceMode == TraceMode::SHaRC) + defines.emplace_back(L"SHARC"); + + if (settings.PathTracing) + defines.emplace_back(L"PATH_TRACING"); + + if (settings.Denoiser == Denoiser::SVGF) + defines.emplace_back(L"RAW_RADIANCE"); + + const auto definesWStr = StringViewToWString(std::string_view{ debugDefines }); + + if (!debugDefines.empty()) { + defines.emplace_back(definesWStr.c_str()); + } + + winrt::com_ptr rayGenBlob; + ShaderUtils::CompileShader(rayGenBlob, L"Data/Shaders/Raytracing/GI/RayGeneration.hlsl", defines); + + winrt::com_ptr missBlob, closestHitBlob, anyHitBlob; + ShaderUtils::CompileShader(missBlob, L"Data/Shaders/Raytracing/GI/Miss.hlsl", defines); + ShaderUtils::CompileShader(closestHitBlob, L"Data/Shaders/Raytracing/GI/ClosestHit.hlsl", defines); + ShaderUtils::CompileShader(anyHitBlob, L"Data/Shaders/Raytracing/GI/AnyHit.hlsl", defines); + + winrt::com_ptr shadowMissBlob, shadowAnyHitBlob; + ShaderUtils::CompileShader(shadowMissBlob, L"Data/Shaders/Raytracing/GI/ShadowMiss.hlsl"); + ShaderUtils::CompileShader(shadowAnyHitBlob, L"Data/Shaders/Raytracing/GI/ShadowAnyHit.hlsl"); + + DX12::RTPipelineBuilder pipelineBuilder; + + // Init pipeline + { + // Libraries + pipelineBuilder.AddRayGenLib(rayGenBlob.get(), L"RayGeneration"); + + pipelineBuilder.AddMissLib(missBlob.get(), L"IndirectMiss"); + pipelineBuilder.AddMissLib(shadowMissBlob.get(), L"ShadowMiss"); + + pipelineBuilder.AddHitLib(closestHitBlob.get(), L"IndirectClosestHit"); + + pipelineBuilder.AddAnyHitLib(anyHitBlob.get(), L"IndirectAnyHit"); + pipelineBuilder.AddAnyHitLib(shadowAnyHitBlob.get(), L"ShadowAnyHit"); + + // Hit groups + pipelineBuilder.AddHitGroup(L"IndirectHitGroup", L"IndirectClosestHit", L"IndirectAnyHit"); + pipelineBuilder.AddHitGroup(L"ShadowHitGroup", L"", L"ShadowAnyHit"); + + // Shader + pipeline config + pipelineBuilder.AddShaderConfig(20, 8); + pipelineBuilder.AddGlobalRootSignature(rootSignature.get()); + pipelineBuilder.AddPipelineConfig(1); // Max recursion depth + + auto desc = pipelineBuilder.MakeStateObjectDesc(); + + auto createPipeline = [&](winrt::com_ptr& pipeline, LPCWSTR name) { + if (pipeline) + pipeline = nullptr; + + HRESULT hr = d3d12Device->CreateStateObject(desc, IID_PPV_ARGS(&pipeline)); + + if (FAILED(hr)) { + logger::critical("CreateStateObject failed: {}", hr); + } + + DX::ThrowIfFailed(hr); + + DX::ThrowIfFailed(pipeline->SetName(std::format(L"{} Pipeline", name).c_str())); + }; + + createPipeline(pipelineRT, L"RT"); + } + + // Init shader tables + { + winrt::com_ptr props; + pipelineRT->QueryInterface(props.put()); + + uint64_t shaderBindingTableSizePrev = shaderBindingTable ? shaderBindingTable->GetTotalSize() : 0; + + if (shaderBindingTable) + shaderBindingTable.reset(); + + shaderBindingTable = eastl::make_unique(pipelineBuilder.CreateShaderBindingTable(props.get())); + + auto shaderBindingTableSize = shaderBindingTable->GetTotalSize(); + logger::debug("[RT] GI SBT size: {}", shaderBindingTableSize); + + // Recreate buffer if necessary + if (!shaderBindingTableBuffer || shaderBindingTableSize > shaderBindingTableSizePrev) { + if (shaderBindingTableBuffer) + shaderBindingTableBuffer.reset(); + + shaderBindingTableBuffer = eastl::make_unique(d3d12Device.get(), shaderBindingTableSize); + shaderBindingTableBuffer->SetName(L"RT Shader Binding Table Buffer"); + } + + std::vector shaderBindingTableCPU(shaderBindingTableSize); + shaderBindingTable->Build(shaderBindingTableCPU.data()); + + shaderBindingTable->LogShaderBindingTable(shaderBindingTableBuffer->resource->GetGPUVirtualAddress()); + + shaderBindingTableBuffer->Update(shaderBindingTableCPU.data(), shaderBindingTableSize); + shaderBindingTableBuffer->Upload(commandList.get()); + shaderBindingTableBuffer->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } +} + +void Raytracing::CompileRTShadowsShaders() +{ + winrt::com_ptr shadowsRTBlob; + ShaderUtils::CompileShader(shadowsRTBlob, L"Data/Shaders/Raytracing/ShadowsRT.hlsl"); + + // Init pipeline + { + D3D12_DXIL_LIBRARY_DESC lib = { + .DXILLibrary = { + .pShaderBytecode = shadowsRTBlob->GetBufferPointer(), + .BytecodeLength = shadowsRTBlob->GetBufferSize() } + }; + + D3D12_HIT_GROUP_DESC hitGroup = { + .HitGroupExport = L"HitGroup", + .Type = D3D12_HIT_GROUP_TYPE_TRIANGLES, + .ClosestHitShaderImport = L"ClosestHit" + }; + + D3D12_RAYTRACING_SHADER_CONFIG shaderCfg = { + .MaxPayloadSizeInBytes = 4, + .MaxAttributeSizeInBytes = 8, + }; + + D3D12_GLOBAL_ROOT_SIGNATURE globalSig = { shadowRS.get() }; + + D3D12_RAYTRACING_PIPELINE_CONFIG pipelineCfg = { .MaxTraceRecursionDepth = 2 }; + + D3D12_STATE_SUBOBJECT subobjects[] = { + { .Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY, .pDesc = &lib }, + { .Type = D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP, .pDesc = &hitGroup }, + { .Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG, .pDesc = &shaderCfg }, + { .Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE, .pDesc = &globalSig }, + { .Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG, .pDesc = &pipelineCfg } + }; + D3D12_STATE_OBJECT_DESC desc = { .Type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE, + .NumSubobjects = std::size(subobjects), + .pSubobjects = subobjects }; + + HRESULT hr = d3d12Device->CreateStateObject(&desc, IID_PPV_ARGS(&shadowPipeline)); + + if (FAILED(hr)) { + logger::error("CreateStateObject failed: {}", hr); + } + + DX::ThrowIfFailed(hr); + + DX::ThrowIfFailed(shadowPipeline->SetName(L"Shadow Pipeline")); + } + + // Init shader tables + { + winrt::com_ptr props; + shadowPipeline->QueryInterface(props.put()); + + size_t shaderBindingTableSize = D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT * 3; + + shadowSBTBuffer = eastl::make_unique(d3d12Device.get(), shaderBindingTableSize); + shadowSBTBuffer->SetName(L"Shadows SBT"); + + std::vector shaderBindingTableCPU(shaderBindingTableSize); + + void* data = shaderBindingTableCPU.data(); + auto writeId = [&](const wchar_t* name) { + void* id = props->GetShaderIdentifier(name); + memcpy(data, id, D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + data = static_cast(data) + + D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT; + }; + + writeId(L"RayGeneration"); + writeId(L"Miss"); + writeId(L"HitGroup"); + + shadowSBTBuffer->Update(shaderBindingTableCPU.data(), shaderBindingTableSize); + shadowSBTBuffer->Upload(commandList.get()); + shadowSBTBuffer->TransitionBarrier(commandList.get(), D3D12_RESOURCE_STATE_GENERIC_READ); + } +} + +void Raytracing::CompileComputeShaders() +{ + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\CopyDepthCS.hlsl", {}, "cs_5_0")); rawPtr) + copyDepthCS.attach(rawPtr); + + const auto skyHemiSize = std::to_string(RTConstants::SKY_HEMI_SIZE); + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\CubeToHemiCS.hlsl", { { "RESOLUTION", skyHemiSize.c_str() } }, "cs_5_0")); rawPtr) + cubeToHemiCS.attach(rawPtr); + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\ConvertTexturesCS.hlsl", {}, "cs_5_0")); rawPtr) + convertTexturesCS.attach(rawPtr); + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\ConvertTexturesCS.hlsl", { { "PT", "" } }, "cs_5_0")); rawPtr) + convertTexturesPTCS.attach(rawPtr); + + CompileCompositeShader(); +} + +void Raytracing::CompileCompositeShader() +{ + std::vector> defines; + + if (settings.ConvertToGamma) { + defines.emplace_back("GAMMA_OUTPUT", ""); + } + + if (!settings.PathTracing && settings.Denoiser == Denoiser::SVGF) { + defines.emplace_back("COMPOSITE", ""); + defines.emplace_back("DIFFUSE", ""); + defines.emplace_back("SPECULAR", ""); + } + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\CompositeCS.hlsl", defines, "cs_5_0")); rawPtr) + compositeCS.attach(rawPtr); + + std::vector> accDefines; + accDefines.emplace_back("ACCUMULATION", ""); + if (settings.ConvertToGamma) { + accDefines.emplace_back("GAMMA_OUTPUT", ""); + } + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\CompositeCS.hlsl", accDefines, "cs_5_0")); rawPtr) + accumulationCS.attach(rawPtr); +} + +Raytracing::SharedData Raytracing::GetCommonBufferData() const +{ + return { + .InteriorDirectional = settings.GlobalIllumination ? 0.0f : 1.0f, + .Ambient = settings.GlobalIllumination ? 0.0f : 1.0f, + .EnvMap = settings.GlobalIllumination ? 0.0f : 1.0f, + .Albedo = settings.GlobalIllumination + }; +} + +RE::BSEventNotifyControl Raytracing::MenuOpenCloseEventHandler::ProcessEvent(const RE::MenuOpenCloseEvent* a_event, RE::BSTEventSource*) +{ + // When entering a loadscreen + if (a_event->menuName == RE::LoadingMenu::MENU_NAME) { + logger::debug("MenuOpenCloseEventHandler::ProcessEvent - Opening: {}", a_event->opening); + + if (a_event->opening) { + //auto& rt = globals::features::raytracing; + } + } + + return RE::BSEventNotifyControl::kContinue; +} + +RE::BSEventNotifyControl Raytracing::TESLoadGameEventHandler::ProcessEvent(const RE::TESLoadGameEvent* a_event, RE::BSTEventSource*) +{ + logger::debug("TESLoadGameEventHandler::ProcessEvent {}", reinterpret_cast(a_event)); + + return RE::BSEventNotifyControl::kContinue; +} + +RE::BSEventNotifyControl Raytracing::TESObjectLoadedEventHandler::ProcessEvent(const RE::TESObjectLoadedEvent* a_event, RE::BSTEventSource*) +{ + if (!a_event) + return RE::BSEventNotifyControl::kContinue; + + auto* eventRef = RE::TESForm::LookupByID(a_event->formID); + + if (a_event->loaded) + return RE::BSEventNotifyControl::kContinue; + + auto formID = eventRef->GetFormID(); + + globals::features::raytracing.RemoveInstance(formID, true); + + return RE::BSEventNotifyControl::kContinue; +} + +RE::BSEventNotifyControl Raytracing::CellAttachDetachEventHandler::ProcessEvent(const RE::CellAttachDetachEvent* a_event, RE::BSTEventSource*) +{ + bool attaching = a_event->status == RE::CellAttachDetachEvent::Status::StartAttach; + bool detaching = a_event->status == RE::CellAttachDetachEvent::Status::StartDetach; + + if (!attaching && !detaching) + return RE::BSEventNotifyControl::kContinue; + + auto& runtimeData = a_event->cell->GetRuntimeData(); + + for (auto& reference : runtimeData.references) { + globals::features::raytracing.SetInstanceDetached(reference->GetFormID(), detaching); + } + + auto* land = runtimeData.cellLand; + + if (!land) + return RE::BSEventNotifyControl::kContinue; + + globals::features::raytracing.SetInstanceDetached(land->GetFormID(), detaching); + + return RE::BSEventNotifyControl::kContinue; +} + +RE::BSEventNotifyControl Raytracing::BGSActorCellEventHandler::ProcessEvent(const RE::BGSActorCellEvent* a_event, RE::BSTEventSource*) +{ + if (a_event->flags.underlying() != static_cast(RE::BGSActorCellEvent::CellFlag::kEnter)) + return RE::BSEventNotifyControl::kContinue; + + auto* tesWaterSystem = RE::TESWaterSystem::GetSingleton(); + + /*if (tesWaterSystem->waterObjects.empty()) { + tesWaterSystem->waterObjects.push_back(RE::NiPointer(globals::features::raytracing.waterObject.get())); + }*/ + + if (tesWaterSystem->waterReflections.empty()) { + tesWaterSystem->waterReflections.push_back(globals::features::raytracing.waterReflections); + } + + tesWaterSystem->Enable(); + + return RE::BSEventNotifyControl::kContinue; +} \ No newline at end of file diff --git a/src/Features/Raytracing.h b/src/Features/Raytracing.h new file mode 100644 index 0000000000..f6bf3afb04 --- /dev/null +++ b/src/Features/Raytracing.h @@ -0,0 +1,1567 @@ +#pragma once + +#include "PCH.h" + +#define DLSS_RR + +#include "Features/Upscaling/DX12SwapChain.h" +#include "LightLimitFix.h" +#include "OverlayFeature.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "State.h" + +#include + +#include "Features/Raytracing/Core/Instance.h" +#include "Features/Raytracing/Core/Model.h" +#include "Features/Raytracing/Core/Shape.h" + +#include "Features/Raytracing/Helpers/ModelSpaceToTangent.h" + +#include "Features/Raytracing/Allocator.h" +#include "Features/Raytracing/Buffer.h" +#include "Features/Raytracing/BufferMA.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/HeapManager.h" +#include "Features/Raytracing/magic_enum_spec.h" +#include "Features/Raytracing/Pipelines/SkinningPipeline.h" +#include "Features/Raytracing/Pipelines/SHaRCPipeline.h" +#include "Features/Raytracing/Pipelines/SVGFPipeline.h" +#include "Features/Raytracing/Pipelines/NRDPipeline.h" +#include "Features/Raytracing/RTConstants.h" +#include "Features/Raytracing/RTPipelineBuilder.h" +#include "Features/Raytracing/ShaderBindingTable.h" +#include "Features/Raytracing/TextureSharing.h" +#include "Features/Raytracing/Types.h" +#include "Features/Raytracing/Utils.h" + +#include "Features/Raytracing/RE/CellAttachDetachEvent.h" + +#include "Raytracing/Includes/Types/FrameData.hlsli" +#include "Raytracing/Includes/Types/Instance.hlsli" +#include "Raytracing/Includes/Types/Light.hlsli" +#include "Raytracing/Includes/Types/Shape.hlsli" +#include "Raytracing/Includes/Types/Material.hlsli" +#include "Raytracing/Includes/Types/ShadowsFrameData.hlsli" +#include "Raytracing/Includes/Types/Skinning.hlsli" +#include "Raytracing/Includes/Types/Triangle.hlsli" +#include "Raytracing/Includes/Types/Vertex.hlsli" + +#include "Raytracing/Denoiser/SVGF/SVGF.hlsli" + +#define NTDDI_VERSION NTDDI_WINBLUE + +#include + +#ifdef DLSS_RR +# define NV_WINDOWS +# pragma warning(push) +# pragma warning(disable: 4471) +# include +# include +# include +# include +# include +# include +# include +# pragma warning(pop) +#endif + +using namespace magic_enum::bitwise_operators; + +#define STATIC_ASSERT_ENUM_COUNT(EnumType, Array) \ + static_assert(_countof(Array) == magic_enum::enum_count(), "Array size must match enum count"); + +struct Raytracing : public OverlayFeature +{ + enum MarkerFlags : uint32_t + { + Compressed = 1 << 18, + MapMarker = 1 << 22, // TESObjectACTI + IsMarker = 1 << 23 // TESObjectSTAT + }; + + struct GIHeapDef + { + enum class Table + { + UAV, + SRV, + VertexBuffer, + TriangleBuffer, + Textures + }; + + enum class Slot + { + Output, + DiffuseAlbedoPathTracing, + NormalRoughnessPathTracing, + Reflectance, + SpecularHitDist, + SHaRCHashEntries, + SHaRCLock, + SHaRCAccumulation, + SHaRCResolved, + Main, + Depth, + Albedo, + NormalRoughness, + GNMD, + TLAS, + SkyHemisphere, + Lights, + Shapes, + Instances, + Vertices, + Triangles = Vertices + RTConstants::MAX_SHAPES, + Textures = Triangles + RTConstants::MAX_SHAPES, + NumDescriptors = Textures + RTConstants::MAX_TEXTURES, + None + }; + }; + using GIHeap = Heap; + + struct ShadowsHeapDef + { + enum class Table + { + UAV, + SRV + }; + + enum class Slot + { + ShadowMask, + Depth, + TLAS, + NumDescriptors, + None + }; + }; + using ShadowsHeap = Heap; + + ////////////////////////////////////////////////// Boilerplate + // Metadata + virtual inline std::string GetName() override { return "Raytracing"; } + virtual inline std::string GetShortName() override { return "Raytracing"; } + virtual inline std::string_view GetCategory() const override { return "Lighting"; } + virtual inline std::string GetFeatureModLink() override { return MakeNexusModURL("999999"); } + virtual inline std::pair> GetFeatureSummary() override + { + return { + "This is a terse description.", + { + "This is a subfeature.", + "This is another subfeature.", + "Cheese.", + } + }; + } + + // Functionality + virtual bool inline SupportsVR() override { return false; } + virtual inline std::string_view GetShaderDefineName() override { return "RT"; } + virtual inline bool HasShaderDefine(RE::BSShader::Type t) override { return t == RE::BSShader::Type::Lighting; }; + virtual std::vector GetActiveConstraints() const override; + + // Settings & UI + virtual void RestoreDefaultSettings() override; + virtual void LoadSettings(json& o_json) override; + virtual void SaveSettings(json& o_json) override; + virtual void DrawSettings() override; + + void DrawSHaRCSettings(); + void DrawSVGFSettings(); + void DrawSVGFInternalSettings(const char* name, SVGFPipeline::Settings& svgfSettings); +#ifdef DLSS_RR + void DrawDLSSRRSettings(); +#endif + void DrawDenoiserSettings(); + void DrawResolutionSettings(); + void DrawLightingSettings(); + void DrawLightSettings(); + void DrawSSSSettings(); + + void DrawGeneralSettings(); + void DrawAdvancedSettings(); + void DrawDebugSettings(); + + virtual void DrawOverlay() override; + + // SKSE kDataLoaded message + virtual void DataLoaded() override; + + virtual void PostPostLoad() override; + + virtual bool IsOverlayVisible() const override { return settings.PerformanceOverlay; }; + + // Resources + virtual void SetupResources() override; + virtual void ClearShaderCache() override; + + void SetupOutputRT(); + + void ShareRT(ID3D11Texture2D* pTexture2D, const GIHeap::Slot& target, const ShadowsHeap::Slot& cTarget, ID3D12Resource** ppResource) const; + void SetupSharedRT(); + void CompileShaders(); + void CompileComputeShaders(); + void CompileCompositeShader(); + + void CompileRTGIShaders(); + void CompileRTShadowsShaders(); + + void InitD3D12(ID3D11Device* ppDevice, ID3D11DeviceContext* pImmediateContext, IDXGIAdapter* a_adapter); + void CreateRootSignature(); + void CreateShadowsRootSignature(); + void DrawRTGI(); + void UpdateShadowsFrameBuffer(); + void RenderShadows(); + + eastl::vector GetPointLights(); + void UpdateLights(); + + void ConvertMSN(); + + void Main_RenderWorld(bool a1); + void BSShader_SetupGeometry(RE::BSShader* This, RE::BSRenderPass* Pass, uint32_t RenderFlags); + + void SkyCubeToHemi() const; + void CheckResourcesSide(int side); + + void AddInstance(RE::FormID formID, RE::NiAVObject* pNiNode, eastl::string path); + + eastl::vector GatherInstanceLights(RE::NiAVObject* pNiNode); + + void UpdateInstances(); + void UpdateBLASes(); + + void UpdateShadowInstances(); + + void DeviceRemovedHandler(); + + void CopyDepth() const; + void UnpackMetallicAO() const; + void CopyConvertTextures() const; + + void PostRaytraceCleanup(); + + void BuildTLAS(); + void RebuildTLAS(ID3D12GraphicsCommandList4* pCommandList, size_t numDescs, D3D12_GPU_VIRTUAL_ADDRESS instanceDescs); + + uint2 GetScreenSize() const; + uint2 GetRenderSize(); + bool UpdateRenderSize(); + +#ifdef DLSS_RR + void InitRR(); + void CheckFrameConstants(); + sl::DLSSMode GetDLSSMode() const; + sl::DLSSDOptions GetDLSSRROptions() const; + void GetDLSSRROptimal(); + void SetDLSSRROptions(); + int32_t GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth); + void GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount); + float Halton(int32_t index, int32_t base); +#endif + + const bool Active() + { + return loaded && settings.Enabled; + }; + + const bool RaytracedShadows() + { + return settings.RaytracedShadows && !settings.PathTracing; + } + + const auto& GetPipelines() + { + if (!skinningPipeline) + skinningPipeline = eastl::make_unique(); + + if (!sharcPipeline) + sharcPipeline = eastl::make_unique(); + + if (!nrdPipeline) + nrdPipeline = eastl::make_unique(); + + static eastl::array pipelines = { + skinningPipeline.get(), + sharcPipeline.get(), + nrdPipeline.get() + }; + + return pipelines; + }; + + static constexpr DXGI_SAMPLE_DESC NO_AA = { .Count = 1, .Quality = 0 }; + static constexpr D3D12_HEAP_PROPERTIES UPLOAD_HEAP = { .Type = D3D12_HEAP_TYPE_UPLOAD }; + static constexpr D3D12_HEAP_PROPERTIES DEFAULT_HEAP = { .Type = D3D12_HEAP_TYPE_DEFAULT }; + static constexpr D3D12_RESOURCE_DESC BASIC_BUFFER_DESC = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Width = 0, // Will be changed in copies + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .SampleDesc = NO_AA, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR + }; + + static constexpr D3D12MA::ALLOCATION_DESC UPLOAD_HEAP_MA = { .HeapType = D3D12_HEAP_TYPE_UPLOAD }; + static constexpr D3D12MA::ALLOCATION_DESC DEFAULT_HEAP_MA = { .HeapType = D3D12_HEAP_TYPE_DEFAULT }; + + enum struct Denoiser : int32_t + { + None, + SVGF, + Accumulation, + NRD, +#ifdef DLSS_RR + DLSSRR +#endif + }; + +#ifdef DLSS_RR + static constexpr Denoiser DefaultDenoiser = Denoiser::DLSSRR; +#else + static constexpr Denoiser DefaultDenoiser = Denoiser::SVGF; +#endif + + enum struct DebugOutput : int32_t + { + None, + Output, + Reflectance, + SpecularHitDistance, + NormalRoughnessGbuffer, + GeometryNormalMetalness, + Albedo, + Diffuse, + Passthrough + }; + +#ifdef DLSS_RR + enum struct DLSSRRQuality : int32_t + { + MaxPerformance, + Balanced, + MaxQuality, + NativeRes, + DLAA + }; + + enum struct DLSSRRPreset : int32_t + { + D, + E + }; +#endif + + enum struct PIXCaptureLocation : int32_t + { + GlobalIllumination, + Shadows + }; + + // TODO: Rename to ReflectanceModel? + enum struct DiffuseBRDF : int32_t + { + Lambert, + Burley, + OrenNayar, + Gotanda, + Chan + }; + + enum struct LightEvalMode : int32_t + { + Diffuse, + BRDF + }; + + enum struct HairBSDF : int32_t + { + None, + ChiangBSDF, + FarFieldBCSDF + }; + + static constexpr const char* LightEvalModeTooltips[] = { + "Diffuse only, no specular.", + "Diffuse and Specular with BRDF." + }; + STATIC_ASSERT_ENUM_COUNT(LightEvalMode, LightEvalModeTooltips); + + enum struct LightingMode : int32_t + { + Diffuse, + PBR + }; + + static constexpr const char* LightingModeTooltips[] = { + "Diffuse only, no reflections.", + "Physically Based Rendering mode with diffuse and reflections." + }; + STATIC_ASSERT_ENUM_COUNT(LightingMode, LightingModeTooltips); + + enum struct TraceMode : int32_t + { + Reference, + SHaRC + }; + + static constexpr const char* TraceModeTooltips[] = { + "Reference mode with no cache.", + "Enables Spatially Hashed Radiance Cache, a technique aimed at improving signal quality and performance." + }; + STATIC_ASSERT_ENUM_COUNT(TraceMode, TraceModeTooltips); + + enum struct Resolution : int32_t + { + Full, + Half, + Quarter, + Eighth + }; + + enum struct CullingMode : int32_t + { + None, + Smart, + Skyrim + }; + + static constexpr const char* CullingModeTooltips[] = { + "Disables culling altogether.", + "Configurable culling made for Ray Tracing.", + "Relies on Skyrim's culling, will create light leaks from culled nodes behind the player." + }; + STATIC_ASSERT_ENUM_COUNT(CullingMode, CullingModeTooltips); + + enum struct CullingDistanceMode : int32_t + { + Minimal, + Ratio + }; + + static constexpr const char* CullingDistanceModeTooltips[] = { + "Culls all geometry outside the view if distance is greater than 'Minimal Distance', regardless of their radius.", + "When distance is greater than 'Start Distance' modulates 'Minimal Radius' by relative distance and ratio." + }; + STATIC_ASSERT_ENUM_COUNT(CullingDistanceMode, CullingDistanceModeTooltips); + +#ifdef DLSS_RR + struct DLSSRRSettings + { + DLSSRRQuality QualityMode = DLSSRRQuality::MaxQuality; + DLSSRRPreset Preset = DLSSRRPreset::E; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(DLSSRRSettings, QualityMode, Preset) + }; +#endif + + struct CullingSettings + { + CullingMode Mode = CullingMode::Smart; + int MinRadius = 1; + + CullingDistanceMode DistanceMode = CullingDistanceMode::Ratio; + + int MinDistance = 100; + + int StartDistance = 10; + float DistanceRatio = 1.0f; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(CullingSettings, Mode, MinRadius, DistanceMode, MinDistance, StartDistance, DistanceRatio) + }; + + // Resampled Importance Sampling + struct RISSettings + { + bool Enabled = true; + int MaxCandidates = 4; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(RISSettings, Enabled, MaxCandidates) + }; + + // Reservoir-based Spatiotemporal Importance Resampling + struct ReSTIRSettings + { + bool ReSTIRDI = true; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(ReSTIRSettings, ReSTIRDI) + }; + + struct SSSSettings + { + bool Enabled = true; + int SampleCount = 1; + float MaxSampleRadius = 1.0f; + bool EnableTransmission = true; + + bool MaterialOverride = false; + float3 OverrideTransmissionColor = float3(1.0f, 0.735f, 0.612f); + float3 OverrideScatteringColor = float3(1.0f, 1.0f, 1.0f); + float OverrideScale = 40.0f; + float OverrideAnisotropy = -0.5f; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT( + SSSSettings, + Enabled, + SampleCount, + MaxSampleRadius, + EnableTransmission, + MaterialOverride, + OverrideTransmissionColor, + OverrideScatteringColor, + OverrideScale, + OverrideAnisotropy) + }; + + struct AdvancedSettings + { + CullingSettings Culling; + + bool VariableUpdateRate = true; + + RISSettings RIS; + ReSTIRSettings ReSTIR; + + bool GGXEnergyConservation = true; + HairBSDF HairBSDF = HairBSDF::FarFieldBCSDF; + + DiffuseBRDF DiffuseBRDF = DiffuseBRDF::Burley; + LightEvalMode LightEvalMode = LightEvalMode::BRDF; + LightingMode LightingMode = LightingMode::PBR; + + SSSSettings SSSSettings; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(AdvancedSettings, Culling, VariableUpdateRate, RIS, ReSTIR, GGXEnergyConservation, HairBSDF, DiffuseBRDF, LightEvalMode, LightingMode, SSSSettings) + }; + + ////////////////////////////////////////////////// Feature Specific Data + struct Settings + { + bool Enabled = true; + bool GlobalIllumination = true; + AdvancedSettings AdvancedSettings; + TraceMode TraceMode = TraceMode::SHaRC; + Denoiser Denoiser = DefaultDenoiser; + Resolution Resolution = Resolution::Full; + int Bounces = 2; + int SamplesPerPixel = 1; + float2 Roughness = { 0.0f, 1.0f }; + float2 Metalness = { 0.0f, 1.0f }; + float Emissive = 1.0f; + float Effect = 1.0f; + float Sky = 1.0f; + float Directional = 1.0f; + float Point = 1.0f; + float TexLODBias = -1.0f; + bool LodDimmer = true; + bool RaytracedShadows = true; + bool PathTracing = false; + bool CullShadows = true; + bool RussianRoulette = true; + bool ConvertToGamma = true; +#ifdef DLSS_RR + DLSSRRSettings DLSSRR; +#endif + SVGFPipeline::Settings SVGFDiffuse; + SVGFPipeline::Settings SVGFSpecular; + bool PerformanceOverlay = false; + DebugOutput DebugOutput = DebugOutput::None; + bool EnablePIXCapture = false; + PIXCaptureLocation PIXCaptureLocation = PIXCaptureLocation::GlobalIllumination; + bool EnableDebugDevice = false; + bool WhiteFurnace = false; + bool DisableSkinned = false; + bool InteriorSun = false; + SHaRCPipeline::Settings SHaRC; + } settings; + + // Debug variables + std::string debugDefines = ""; + bool debugDisableTriShapesUpdate = false; + bool debugDisableTextureSharing = false; + bool debugNormalMap = false; + uint debugNormalMapIndex = 0; + bool debugSkyHemi = false; + + enum class RecompileReason : uint32_t + { + None = 0, + General = 1 << 0, + Advanced = 1 << 1, + Debug = 1 << 2, + RestoreDefaultsSettings = 1 << 3, + LoadSettings = 1 << 4 + } recompileReason = RecompileReason::None; + + bool shareTexture = false; + bool renderingWorld = false; + bool lightsUpdated = false; + + winrt::com_ptr ga = nullptr; + + bool pixCapture = false; + bool pixCaptureStarted = false; + bool pixMultiFrame = false; + bool pixTDR = false; + + bool releaseBufferHooked = false; + bool releaseHooked = false; + HANDLE fenceEvent; + + struct TextureReference + { + winrt::com_ptr resource; + eastl::shared_ptr allocation; + + TextureReference(winrt::com_ptr&& res, eastl::shared_ptr&& alloc) : + resource(eastl::move(res)), allocation(eastl::move(alloc)) {} + }; + + // Creates mesh buffers for all graph TriShapes, handles materials and builds a single BLAS for the node + void CreateModel(RE::TESForm* form, const char* model, RE::NiAVObject* root); + void CreateActorModel(RE::Actor* actor, const char* name, RE::NiAVObject* root); + void CreateModelInternal(RE::TESForm* refr, const char* path, RE::NiAVObject* root); + + // Removes the instance and optionally also releases the model and all its buffers if refCount reaches 0 + bool RemoveInstance(RE::NiAVObject* root, bool releaseModel); + bool RemoveInstance(RE::FormID formID, bool releaseModel); + + void SetInstanceDetached(RE::NiAVObject* root, bool detached); + void SetInstanceDetached(RE::FormID formID, bool detached); + + eastl::shared_ptr GetTextureRegister(ID3D11Texture2D* texture, eastl::shared_ptr defaultTexture); + eastl::shared_ptr GetMSNormalMapRegister(Shape* shape, RE::BSGraphics::Texture* texture, eastl::shared_ptr defaultTexture); + + Allocator shapeRegisters = Allocator(RTConstants::MAX_SHAPES); + Allocator textureRegisters = Allocator(RTConstants::MAX_TEXTURES); + + struct DefaultTexture + { + eastl::shared_ptr allocation = nullptr; + eastl::unique_ptr> texture = nullptr; + + DefaultTexture(ID3D12Device5* device, Allocation* allocation) : + allocation(allocation) + { + texture = eastl::make_unique>(device, 1, 1, DXGI_FORMAT_R8G8B8A8_UNORM); + } + + void UpdateAndUpload(ID3D12GraphicsCommandList4* commandList, uint8_t* pixel) const + { + D3D12_SUBRESOURCE_DATA srcData = { + .pData = pixel, + .RowPitch = 4, + .SlicePitch = 4 + }; + + UpdateSubresources( + commandList, + texture->resource.get(), + texture->uploadResource.get(), + 0, 0, 1, + &srcData); + } + + template + void CreateSRV(DX12::DescriptorHeap* heap, HeapType::Slot item) const + { + auto handle = heap->CPUHandle(item, allocation.get()); + + texture->CreateSRV(handle); + } + + uint16_t GetIndex() const + { + return allocation->GetIndex(); + } + }; + + eastl::shared_ptr defaultWhiteTexture = nullptr; + eastl::shared_ptr defaultGrayTexture = nullptr; + eastl::shared_ptr defaultNormalTexture = nullptr; + eastl::shared_ptr defaultBlackTexture = nullptr; + eastl::shared_ptr defaultRMAOSTexture = nullptr; + eastl::shared_ptr defaultDetailTexture = nullptr; + + // TODO: Add cleanup for elements of this vector + eastl::unordered_map> dismemberReferences; + + // We'll group trishapes by their parent nodes, hopefully trishapes don't move on their own + eastl::unordered_map> models; + + winrt::com_ptr allocator = nullptr; + + winrt::com_ptr uploadPool = nullptr; + + winrt::com_ptr dynamicVertexPool = nullptr; + winrt::com_ptr vertexPool = nullptr; + winrt::com_ptr vertexCopyPool = nullptr; + winrt::com_ptr skinningPool = nullptr; + winrt::com_ptr trianglePool = nullptr; + + winrt::com_ptr blasScratchPool = nullptr; + winrt::com_ptr blasPool = nullptr; + + eastl::unordered_map instances; + eastl::unordered_map> formIDNodes; + + // Transform buffer for BLAS build/rebuild + eastl::unique_ptr> transformBuffer = nullptr; + + // Indirection, Transform and Material buffers + ShapeData* shapeData = nullptr; + eastl::unique_ptr> shapeBuffer = nullptr; + + eastl::array instanceData; + eastl::unique_ptr> instanceBuffer = nullptr; + + Util::FrameChecker frameChecker; + uint64_t frameIndex; + + Util::FrameChecker shadowFrameChecker; + + // Textures that have been shared with DX12 and placed in a heap as SRV + eastl::unordered_map> textures; + + struct ConvertedNormalMap + { + eastl::unique_ptr Reference; + eastl::unique_ptr Texture; + ID3D11ShaderResourceView* OriginalSRV = nullptr; + bool converted = false; + }; + + eastl::deque msnConvertionQueue; + + eastl::unordered_map> normalMaps; + + eastl::unordered_map allocationMSNormalMaps; + + winrt::com_ptr samplerState = nullptr; + winrt::com_ptr copyDepthCS = nullptr; + winrt::com_ptr convertTexturesCS = nullptr; + winrt::com_ptr convertTexturesPTCS = nullptr; + winrt::com_ptr compositeCS = nullptr; + winrt::com_ptr accumulationCS = nullptr; + + struct alignas(16) AccumulationCBData + { + uint AccumulatedFrames; + float3 _padding; + }; + eastl::unique_ptr accumulationCBData = nullptr; + eastl::unique_ptr accumulationCB = nullptr; + + eastl::unique_ptr> blasInstanceBuffer = nullptr; + eastl::vector blasInstances; + + winrt::com_ptr tlas = nullptr; + winrt::com_ptr tlasScratch = nullptr; + winrt::com_ptr tlasUpdateScratch = nullptr; + + eastl::vector lights; + eastl::unique_ptr> lightBuffer = nullptr; + + // GI + eastl::unique_ptr> frameBuffer = nullptr; + eastl::unique_ptr frameData = nullptr; + + // Shadows + eastl::unique_ptr> blasShadowInstanceBuffer = nullptr; + eastl::vector blasShadowInstances; + + RE::BSShadowDirectionalLight* shadowLight; + + eastl::unique_ptr> shadowsCB = nullptr; + eastl::unique_ptr shadowsCBData = nullptr; + + // SVGF + struct alignas(16) SharedData + { + float InteriorDirectional; + float Ambient; + float EnvMap; + uint Albedo; + }; + static_assert(sizeof(SharedData) % 16 == 0); + + SharedData GetCommonBufferData() const; + + // D3D12 + winrt::com_ptr d3d12Device = nullptr; + winrt::com_ptr commandQueue = nullptr; + winrt::com_ptr commandAllocator = nullptr; + winrt::com_ptr commandList = nullptr; + + winrt::com_ptr d3d11Fence = nullptr; + winrt::com_ptr d3d12Fence = nullptr; + + // Skinning (and dynamic TriShapes) + eastl::unique_ptr skinningPipeline = nullptr; + + // SHaRC (Radiance cache) + eastl::unique_ptr sharcPipeline = nullptr; + + // SVGF (denoiser) + eastl::unique_ptr nrdPipeline = nullptr; + + // SVGF (denoiser) + eastl::unique_ptr svgfDenoiser = nullptr; + + // TODO: Move other effects to their own pipelines as well + // eastl::unique_ptr RTPipeline = nullptr; + // eastl::unique_ptr shadowPipeline = nullptr; + + // GI + winrt::com_ptr rootSignature = nullptr; + winrt::com_ptr pipelineRT = nullptr; + eastl::unique_ptr shaderBindingTable = nullptr; + eastl::unique_ptr shaderBindingTableBuffer = nullptr; + eastl::unique_ptr> giHeap = nullptr; + + // Shadows + winrt::com_ptr shadowRS = nullptr; + winrt::com_ptr shadowPipeline = nullptr; + eastl::unique_ptr shadowSBTBuffer = nullptr; + eastl::unique_ptr> shadowHeap = nullptr; + + uint64_t fenceValue = 0; + + struct TempGPUData + { + winrt::com_ptr scratchBuffers; + eastl::vector> retiredModels; + uint64_t fenceValue; + }; + + eastl::deque tempGPUData; + + // D3D11 + winrt::com_ptr d3d11Device = nullptr; + winrt::com_ptr d3d11Context = nullptr; + + struct alignas(16) RenderResData + { + uint2 RenderRes; + float2 RenderResRcp; + }; + + eastl::unique_ptr renderResData = nullptr; + eastl::unique_ptr renderResCB = nullptr; + + eastl::unique_ptr normalMapConverter; + + eastl::unique_ptr skyHemisphere = nullptr; + winrt::com_ptr cubeToHemiCS = nullptr; + + // Shadow maps + bool renderingShadowmap = false; + eastl::unique_ptr shadowMaskTexture = nullptr; + + // Resources + eastl::unique_ptr outputTexture = nullptr; + eastl::unique_ptr diffuseAlbedoPathTracingTexture = nullptr; + eastl::unique_ptr normalRoughnessPathTracingTexture = nullptr; + eastl::unique_ptr specularAlbedoTexture = nullptr; + eastl::unique_ptr specularHitDistanceTexture = nullptr; + + eastl::unique_ptr depthTexture = nullptr; + eastl::unique_ptr motionVectorsTexture = nullptr; + + // True Albedo + winrt::com_ptr albedoTexture = nullptr; + + // Metalness modulated albedo + eastl::unique_ptr diffuseAlbedoTexture = nullptr; + + // World normal and roughness + eastl::unique_ptr normalRoughnessTexture = nullptr; + + // Geometry normal, metalness and AO + winrt::com_ptr GNMDTexture = nullptr; + + eastl::unique_ptr mainTexture = nullptr; + + // Accumulation buffer for path tracing denoiser + eastl::unique_ptr accumulationTexture = nullptr; + eastl::unique_ptr accumulationTextureCopy = nullptr; + + std::shared_mutex modelMutex; + std::shared_mutex landDetachMutex; + std::shared_mutex bufferMutex; + std::shared_mutex renderMutex; + + std::shared_mutex textureRegisterMutex; + std::recursive_mutex shareTextureMutex; + + uint2 renderSize; + float2 dynamicResolutionRatio; + + // Timings + double captureInterval = 0.1; + double lastTime = 0; + bool canMeasure = false; + + // Accumulation denoiser state + int accumulatedFrames = 0; + bool cameraHasMoved = true; + + RE::NiPointer waterReflections = nullptr; + + void UpdateMeasureTime(double currentTime) + { + double delta = currentTime - lastTime; + + if (delta > captureInterval) { + lastTime = currentTime; + canMeasure = true; + } else + canMeasure = false; + } + + float mainCPUTime; + float mainGPUTime; + + float shadowsCPUTime; + float shadowsGPUTime; + +#if defined(DLSS_RR) + HMODULE interposer = NULL; + + PFun_slInit* slInit{}; + PFun_slEvaluateFeature* slEvaluateFeature{}; + PFun_slGetNewFrameToken* slGetNewFrameToken{}; + PFun_slSetD3DDevice* slSetD3DDevice{}; + + PFun_slDLSSDGetOptimalSettings* slDLSSDGetOptimalSettings{}; + PFun_slDLSSDGetState* slDLSSDGetState{}; + PFun_slDLSSDSetOptions* slDLSSDSetOptions{}; + + PFun_slSetConstants* slSetConstants{}; + PFun_slGetFeatureFunction* slGetFeatureFunction{}; + PFun_slSetTag* slSetTag{}; + + sl::ViewportHandle slViewportHandle{ 0 }; + + Util::FrameChecker dlssFrameChecker; + sl::FrameToken* frameToken = nullptr; + + float2 jitter = { 0, 0 }; + + sl::DLSSDOptions dlssdOptions{}; + sl::DLSSDOptimalSettings optimalSettings{}; +#endif + + struct Hooks + { + struct ID3D11Device_CreateTexture2D + { + static HRESULT WINAPI thunk(ID3D11Device* This, const D3D11_TEXTURE2D_DESC* pDesc, const D3D11_SUBRESOURCE_DATA* pInitialData, ID3D11Texture2D** ppTexture2D) + { + if (!pDesc) + return func(This, pDesc, pInitialData, ppTexture2D); + + auto& rt = globals::features::raytracing; + std::lock_guard lock(rt.shareTextureMutex); + + D3D11_TEXTURE2D_DESC descCopy = *pDesc; + + if (rt.shareTexture && !(pDesc->MiscFlags & D3D11_RESOURCE_MISC_TEXTURECUBE)) { + descCopy.MiscFlags |= D3D11_RESOURCE_MISC_SHARED; + } + + return func(This, &descCopy, pInitialData, ppTexture2D); + } + + static inline REL::Relocation func; + }; + + struct NiSourceTexture_Destructor + { + static void thunk(RE::NiSourceTexture* oThis) + { + if (oThis && oThis->rendererTexture) { + if (auto resource = oThis->rendererTexture->texture) { + auto& rt = globals::features::raytracing; + + ID3D11Texture2D* texture = nullptr; + + resource->QueryInterface(IID_PPV_ARGS(&texture)); + + if (auto it = rt.textures.find(texture); it != rt.textures.end()) { + auto index = it->second->allocation->GetIndex(); + + logger::debug("[RT] NiSourceTexture::Destructor [0x{:8X}] - Register: {}", reinterpret_cast(texture), index); + + // I imagine this isn't fast but I'll keep this in until I'm sure everything has been fixed + for (auto& [key, model] : rt.models) { + for (auto& shape : model->shapes) { + auto& material = shape->material; + + for (auto& materialTexture : material.Textures) { + if (index == materialTexture->GetIndex()) + logger::critical("[RT]\t\t NiSourceTexture::Destructor - Found in: {}", key); + } + } + } + + rt.textures.erase(it); + } + } + } + + func(oThis); + } + + static inline REL::Relocation func; + }; + + struct Main_RenderWorld + { + static void thunk(bool a1) + { + globals::features::raytracing.Main_RenderWorld(a1); + } + static inline REL::Relocation func; + }; + + template + struct BSShader_SetupGeometry + { + static void thunk(RE::BSShader* This, RE::BSRenderPass* Pass, uint32_t RenderFlags) + { + auto& rt = globals::features::raytracing; + + if (rt.Active()) { + rt.BSShader_SetupGeometry(This, Pass, RenderFlags); + } + + func(This, Pass, RenderFlags); + } + static inline REL::Relocation func; + }; + + struct BSTriShape_OnVisible + { + static void thunk(RE::BSTriShape* This, RE::NiCullingProcess& a_process) + { + func(This, a_process); + } + static inline REL::Relocation func; + }; + + struct BSShadowDirectionalLight_RenderShadowmaps + { + static void thunk(RE::BSShadowDirectionalLight* light, void* a2) + { + auto& rt = globals::features::raytracing; + rt.renderingShadowmap = true; + + if (rt.Active() && rt.RaytracedShadows()) { + rt.UpdateShadowsFrameBuffer(); + + auto& runtimeData = light->GetShadowDirectionalLightRuntimeData(); + for (size_t i = 0; i < 3; i++) { + runtimeData.startSplitDistances[i] = 0; + runtimeData.endSplitDistances[i] = 0; + } + } + + // This is effectively bypassed (removing the call freezes the game...) + func(light, a2); + + rt.renderingShadowmap = false; + + if (rt.Active() && rt.RaytracedShadows()) { + rt.shadowLight = light; + } + } + + static inline REL::Relocation func; + }; + + struct Main_RenderShadowmasks + { + static void thunk(bool a1) + { + auto& rt = globals::features::raytracing; + + if (rt.Active() && rt.RaytracedShadows()) + rt.RenderShadows(); + else + func(a1); + }; + static inline REL::Relocation func; + }; + + template + struct Release3DRelatedData + { + static void thunk(T* oThis) + { + globals::features::raytracing.RemoveInstance(oThis->GetFormID(), true); + + func(oThis); + } + static inline REL::Relocation func; + }; + + struct TESObjectREFR_Enable + { + static void thunk(RE::TESObjectREFR* oThis, bool a_resetInventory) + { + if (auto& rt = globals::features::raytracing; rt.Active()) { + auto* baseObject = oThis->GetBaseObject(); + + if (auto* model = baseObject->As()) { + logger::info("[RT] TESObjectREFR::Enable: {}", model->GetModel()); + } + } + + func(oThis, a_resetInventory); + } + static inline REL::Relocation func; + }; + + struct TESObjectREFR_Disable + { + static void thunk(RE::TESObjectREFR* oThis) + { + if (auto& rt = globals::features::raytracing; rt.Active()) { + auto* baseObject = oThis->GetBaseObject(); + + if (auto* model = baseObject->As()) { + logger::info("[RT] TESObjectREFR::Disable: {}", model->GetModel()); + } + } + + func(oThis); + } + static inline REL::Relocation func; + }; + + template + struct Destructor + { + static void thunk(T* oThis) + { + if (auto& rt = globals::features::raytracing; rt.Active()) { + rt.RemoveInstance(oThis, false); + } + + func(oThis); + } + static inline REL::Relocation func; + }; + + struct CreateTextureFromDDS + { + static RE::NiSourceTexture* thunk(RE::BSResource::CompressedArchiveStream* a1, char* path, ID3D11ShaderResourceView* srv, char a4, bool a5) + { + auto& rt = globals::features::raytracing; + + std::lock_guard lock(rt.shareTextureMutex); + + rt.shareTexture = !rt.debugDisableTextureSharing; + + auto* result = func(a1, path, srv, a4, a5); + + rt.shareTexture = false; + + return result; + }; + static inline REL::Relocation func; + }; + + struct TESObjectLAND_Attach3D + { + static void thunk(RE::TESObjectLAND* oThis, bool a2) + { + func(oThis, a2); + + logger::trace("[RT] TESObjectLAND_Attach3D - a2: {}, IsLODLand: {}", a2, oThis->QIsLODLandObject()); + + if (!oThis) + return; + + auto* cell = oThis->parentCell; + + if (!cell->IsExteriorCell()) + return; + + auto& runtimeData = cell->GetRuntimeData(); + + auto* exteriorData = runtimeData.cellData.exterior; + + auto* loadedData = oThis->loadedData; + + if (!loadedData || !loadedData->mesh) + return; + + logger::trace("[RT] TESObjectLAND_Attach3D - {}", std::format("Landscape_{}_{}", exteriorData->cellX, exteriorData->cellY).c_str()); + + for (uint i = 0; i < 4; i++) { + auto mesh = loadedData->mesh[i]; + + if (!mesh) + continue; + + globals::features::raytracing.CreateModelInternal(oThis, std::format("Landscape_{}_{}_Quad_{}", exteriorData->cellX, exteriorData->cellY, i).c_str(), mesh); + } + }; + static inline REL::Relocation func; + }; + + struct TESObjectLAND_Detach3D + { + static void thunk(RE::TESObjectLAND* oThis) + { + auto& rt = globals::features::raytracing; + + std::lock_guard lock{ rt.landDetachMutex }; + + rt.RemoveInstance(oThis->GetFormID(), true); + + auto* cell = oThis->parentCell; + + if (cell->IsExteriorCell()) { + auto& runtimeData = cell->GetRuntimeData(); + + auto* exteriorData = runtimeData.cellData.exterior; + + logger::debug("[RT] TESObjectLAND::Detach3D - {}", std::format("Landscape_{}_{}", exteriorData->cellX, exteriorData->cellY).c_str()); + } + + func(oThis); + } + static inline REL::Relocation func; + }; + + struct AttachDistant3DTask_Attach + { + static void thunk(void* a1, float a2) + { + func(a1, a2); + + auto* refr = *reinterpret_cast(reinterpret_cast(a1) + 24); + + logger::info("[RT] AttachDistant3DTask::Attach {}", magic_enum::enum_name(refr->GetFormType())); + } + static inline REL::Relocation func; + }; + + struct TES_AttachModel + { + static void thunk(RE::TES* a1, RE::TESObjectREFR* refr, RE::TESObjectCELL *cell, void* queuedTree, char a5, RE::NiNode* a6) + { + auto* baseObject = refr->GetBaseObject(); + + logger::debug("\tTES::AttachModel {} - {:08X}, {} - {:08X}", + magic_enum::enum_name(refr->formType.get()), refr->GetFormID(), + magic_enum::enum_name(baseObject->formType.get()), baseObject->GetFormID()); + + func(a1, refr, cell, queuedTree, a5, a6); + + if (auto& rt = globals::features::raytracing; rt.Active()) { + auto flags = baseObject->GetFormFlags(); + RE::FormType type = baseObject->GetFormType(); + + if (type == RE::FormType::IdleMarker) { + return; + } + + if (flags & MarkerFlags::IsMarker) { + if (type == RE::FormType::Static) + return; + + if (type == RE::FormType::Door) + return; + + if (type == RE::FormType::Action) + return; + + if (type == RE::FormType::Furniture) + return; + } + + auto* pNiAVObject = refr->Get3D(); + + if (!pNiAVObject) { + logger::warn("\tTES::AttachModel - No 3D"); + return; + } + + std::lock_guard lock{ rt.modelMutex }; + + if (auto* model = baseObject->As()) { + rt.CreateModel(refr, model->GetModel(), pNiAVObject); + } else { + if (IsPlayer(refr)) { + if (auto* player = reinterpret_cast(refr)) { + const char* name = player->GetName(); + + // First Person + //rt.CreateModelInternal(refr, std::format("{}_1stPerson", name).c_str(), pNiAVObject); + + // Third Person + rt.CreateActorModel(player, name, player->Get3D(false)); + + return; + } + } + + if (auto* actor = refr->As()) { + rt.CreateActorModel(actor, actor->GetName(), pNiAVObject); + return; + } + + logger::warn("\tTES::AttachModel - No TESModel - {}, {:08X}", magic_enum::enum_name(refr->formType.get()), refr->GetFormID()); + } + } + } + static inline REL::Relocation func; + }; + + struct CreateRenderTarget_PlayerFaceGenTint + { + static void thunk(RE::BSGraphics::Renderer* oThis, RE::RENDER_TARGETS::RENDER_TARGET a_target, RE::BSGraphics::RenderTargetProperties* a_properties) + { + auto& rt = globals::features::raytracing; + + std::lock_guard lock(rt.shareTextureMutex); + + rt.shareTexture = !rt.debugDisableTextureSharing; + + func(oThis, a_target, a_properties); + + rt.shareTexture = false; + } + static inline REL::Relocation func; + }; + + struct Main_RenderWaterEffects + { + static void thunk() + { + auto* tes = RE::TES::GetSingleton(); + if (tes->interiorCell) { + if (tes->interiorCell->cellFlags.none(RE::TESObjectCELL::Flag::kHasWater)) + tes->interiorCell->cellFlags.set(true, RE::TESObjectCELL::Flag::kHasWater); + + globals::features::raytracing.waterReflections->flags.set(true, RE::TESWaterReflections::Flags::kDirty); + } + + func(); + }; + static inline REL::Relocation func; + }; + + template + struct Set3D + { + static void thunk(T* oThis, RE::NiAVObject* a_object, bool a_queue3DTasks = true) + { + if (!a_object) + globals::features::raytracing.RemoveInstance(oThis->GetFormID(), true); + + func(oThis, a_object, a_queue3DTasks); + } + static inline REL::Relocation func; + }; + + struct BSDismemberSkinInstance_UpdateDismemberPartion + { + static void thunk(RE::BSDismemberSkinInstance* oThis, std::uint16_t a_slot, bool a_enable) + { + func(oThis, a_slot, a_enable); + + auto& dismemberReferences = globals::features::raytracing.dismemberReferences; + + if (auto it = dismemberReferences.find(oThis); it != dismemberReferences.end()) { + for (auto& shape : it->second) { + if (a_slot == shape->slot) { + logger::debug("[RT] BSDismemberSkinInstance::UpdateDismemberPartion {} {} - 0x{:08X} 0x{:08X}", a_slot, a_enable, reinterpret_cast(oThis), reinterpret_cast(shape)); + shape->UpdateDismember(a_enable); + break; + } + } + } + } + static inline REL::Relocation func; + }; + + static void Install() + { + // Creates model and instances for all forms + stl::detour_thunk(REL::RelocationID(13209, 13355)); + + // Releases 3D resources (instances and models) + { + stl::write_vfunc<0x6B, Release3DRelatedData>(RE::VTABLE_TESObjectREFR[0]); + stl::detour_thunk>(REL::RelocationID(36199, 37178)); + } + + // Makes Player FaceGenTint RenderTarget shareable + stl::write_thunk_call(REL::RelocationID(100458, 107175).address() + REL::Relocate(0x606, 0x605, 0x0)); + + // Updates Shape dismember state + stl::detour_thunk(REL::RelocationID(15576, 15753)); + + //stl::detour_thunk(REL::RelocationID(19373, 19800)); + //stl::write_vfunc<0x89, TESObjectREFR_Disable>(RE::VTABLE_TESObjectREFR[0]); + + // NiSourceTexture Destructor + stl::write_vfunc<0x0, NiSourceTexture_Destructor>(RE::VTABLE_NiSourceTexture[0]); + + // Destructors to remove instances (not models) + { + stl::write_vfunc<0x0, Destructor>(RE::VTABLE_NiNode[0]); + stl::write_vfunc<0x0, Destructor>(RE::VTABLE_BSFadeNode[0]); + stl::write_vfunc<0x0, Destructor>(RE::VTABLE_BSLeafAnimNode[0]); + } + + stl::detour_thunk(REL::RelocationID(35561, 36560)); + + stl::detour_thunk(REL::RelocationID(100424, 107142)); + + // We use these to render only the sky to the cubemaps, maybe it would be cleaner if we could override cubemap renderpass? + stl::write_vfunc<0x6, BSShader_SetupGeometry>(RE::VTABLE_BSLightingShader[0]); + + if (REL::Module::IsAE()) { + stl::write_vfunc<0x35, BSTriShape_OnVisible>(RE::VTABLE_BSTriShape[0]); + } else { + stl::write_vfunc<0x34, BSTriShape_OnVisible>(RE::VTABLE_BSTriShape[0]); + } + + stl::detour_thunk(REL::RelocationID(100422, 107140)); + + stl::write_vfunc<0xA, BSShadowDirectionalLight_RenderShadowmaps>(RE::VTABLE_BSShadowDirectionalLight[0]); + + stl::detour_thunk(REL::RelocationID(69334, 70716)); + + //stl::detour_thunk(REL::RelocationID(18334, 18750)); + //stl::detour_thunk(REL::RelocationID(18333, 18749)); + + //stl::write_vfunc<0x6, AttachDistant3DTask_Attach>(RE::VTABLE_AttachDistant3DTask[0]); + + logger::info("[RT] Installed hooks"); + } + + static void InstallD3D11Hooks(ID3D11Device* pDevice) + { + stl::detour_vfunc<5, ID3D11Device_CreateTexture2D>(pDevice); + //stl::detour_vfunc<7, ID3D11Device_CreateShaderResourceView>(pDevice); + + logger::info("[RT] Installed D3D11 hooks - {}", reinterpret_cast(pDevice)); + } + }; + + class MenuOpenCloseEventHandler : public RE::BSTEventSink + { + public: + virtual RE::BSEventNotifyControl ProcessEvent(const RE::MenuOpenCloseEvent* a_event, RE::BSTEventSource*); + + static bool Register() + { + static MenuOpenCloseEventHandler singleton; + auto ui = globals::game::ui; + + if (!ui) { + logger::error("UI event source not found"); + return false; + } + + ui->GetEventSource()->AddEventSink(&singleton); + + logger::info("Registered {}", typeid(singleton).name()); + + return true; + } + }; + + class TESLoadGameEventHandler : public RE::BSTEventSink + { + public: + virtual RE::BSEventNotifyControl ProcessEvent(const RE::TESLoadGameEvent* a_event, RE::BSTEventSource*); + + static bool Register() + { + static TESLoadGameEventHandler singleton; + + auto scriptEventSourceHolder = RE::ScriptEventSourceHolder::GetSingleton(); + scriptEventSourceHolder->GetEventSource()->AddEventSink(&singleton); + + logger::info("Registered {}", typeid(singleton).name()); + + return true; + } + }; + + class TESObjectLoadedEventHandler : public RE::BSTEventSink + { + public: + virtual RE::BSEventNotifyControl ProcessEvent(const RE::TESObjectLoadedEvent* a_event, RE::BSTEventSource*); + + static bool Register() + { + static TESObjectLoadedEventHandler singleton; + + auto scriptEventSourceHolder = RE::ScriptEventSourceHolder::GetSingleton(); + scriptEventSourceHolder->GetEventSource()->AddEventSink(&singleton); + + logger::info("Registered {}", typeid(singleton).name()); + + return true; + } + }; + + class CellAttachDetachEventHandler : public RE::BSTEventSink + { + public: + virtual RE::BSEventNotifyControl ProcessEvent(const RE::CellAttachDetachEvent* a_event, RE::BSTEventSource*); + + static bool Register() + { + static CellAttachDetachEventHandler singleton; + + auto* tes = RE::TES::GetSingleton(); + tes->AddEventSink(&singleton); + + logger::info("Registered {}", typeid(singleton).name()); + + return true; + } + }; + + class BGSActorCellEventHandler : public RE::BSTEventSink + { + public: + virtual RE::BSEventNotifyControl ProcessEvent(const RE::BGSActorCellEvent* a_event, RE::BSTEventSource*); + + static bool Register() + { + static BGSActorCellEventHandler singleton; + + auto* player = RE::PlayerCharacter::GetSingleton(); + player->AsBGSActorCellEventSource()->AddEventSink(&singleton); + + logger::info("Registered {}", typeid(singleton).name()); + + return true; + } + }; +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Allocator.cpp b/src/Features/Raytracing/Allocator.cpp new file mode 100644 index 0000000000..887fdfa59b --- /dev/null +++ b/src/Features/Raytracing/Allocator.cpp @@ -0,0 +1,7 @@ +#include "Features/Raytracing/Allocator.h" + +void Allocation::FreeAllocation() const +{ + logger::debug("[RT] Allocation::FreeAllocation - Index {}", index); + allocator->Free(index); +} \ No newline at end of file diff --git a/src/Features/Raytracing/Allocator.h b/src/Features/Raytracing/Allocator.h new file mode 100644 index 0000000000..5551f07467 --- /dev/null +++ b/src/Features/Raytracing/Allocator.h @@ -0,0 +1,91 @@ +#pragma once + +class Allocator; + +class Allocation +{ + uint16_t index; + Allocator* allocator; + +public: + Allocation(uint16_t slot, Allocator* allocator) : + index(slot), allocator(allocator) {} + + Allocation(const Allocation&) = delete; + Allocation& operator=(const Allocation&) = delete; + + Allocation(Allocation&&) = default; + Allocation& operator=(Allocation&&) = default; + + uint16_t GetIndex() const + { + return index; + } + + void FreeAllocation() const; +}; + +class Allocator +{ +public: + explicit Allocator(uint16_t maxSlots) : + nextFree(0), slots(maxSlots) + { + // Initialize free list + for (uint16_t i = 0; i < maxSlots - 1; ++i) + slots[i] = i + 1; + + slots[maxSlots - 1] = INVALID; // end of list + } + + // Allocate the lowest available slot + Allocation* Allocate() + { + if (nextFree == INVALID) + logger::critical("[RT] Allocator::Allocate - No available allocation slots."); + + uint16_t slot = nextFree; + nextFree = slots[slot]; // move head + return new Allocation(slot, this); + } + + // Free a slot + void Free(uint16_t slot) + { + slots[slot] = nextFree; + nextFree = slot; + } + + bool HasFree() const { return nextFree != INVALID; } + + uint16_t FreeCount() const + { + uint16_t count = 0; + uint16_t cur = nextFree; + + while (cur != INVALID) { + ++count; + cur = slots[cur]; + } + return count; + } + + uint16_t UsedCount() const + { + return static_cast(slots.size() - FreeCount()); + } + +private: + static constexpr uint16_t INVALID = 0xFFFF; + uint16_t nextFree; + eastl::vector slots; +}; + +struct AllocationDeleter +{ + void operator()(Allocation* a) const noexcept + { + a->FreeAllocation(); + delete a; + } +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Buffer.h b/src/Features/Raytracing/Buffer.h new file mode 100644 index 0000000000..2bca0ac686 --- /dev/null +++ b/src/Features/Raytracing/Buffer.h @@ -0,0 +1,521 @@ +#pragma once + +#include +#include + +namespace DX12 +{ + class Resource + { + public: + explicit Resource(ID3D12Device5* device, D3D12_HEAP_TYPE heapType, const D3D12_RESOURCE_DESC& desc, D3D12_RESOURCE_STATES initialState) : + device(device), desc(desc) + { + const auto& heapProps = CD3DX12_HEAP_PROPERTIES(heapType); + DX::ThrowIfFailed(device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &desc, + initialState, + nullptr, + IID_PPV_ARGS(&resource))); + + state = initialState; + } + + virtual ~Resource() = default; + + virtual void SetName(LPCWSTR name) const + { + DX::ThrowIfFailed(resource->SetName(name)); + } + + virtual CD3DX12_RESOURCE_BARRIER GetTransitionBarrier(bool setState, D3D12_RESOURCE_STATES stateAfter, UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) + { + D3D12_RESOURCE_STATES stateBefore = state; + + if (setState) + state = stateAfter; + + return CD3DX12_RESOURCE_BARRIER::Transition(resource.get(), stateBefore, stateAfter, subresource); + } + + virtual void TransitionBarrier(ID3D12GraphicsCommandList4* commandList, D3D12_RESOURCE_STATES stateAfter, UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) + { + if (state == stateAfter) + return; + + const auto& resourceBarrier = CD3DX12_RESOURCE_BARRIER::Transition(resource.get(), state, stateAfter, subresource); + commandList->ResourceBarrier(1, &resourceBarrier); + + state = stateAfter; + } + + virtual void UAVBarrier(ID3D12GraphicsCommandList4* commandList) + { + const auto& resourceBarrier = CD3DX12_RESOURCE_BARRIER::UAV(resource.get()); + commandList->ResourceBarrier(1, &resourceBarrier); + } + + virtual void CreateSRV(D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateShaderResourceView(resource.get(), &srvDesc, handle); + } + + virtual void CreateUAV(D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateUnorderedAccessView(resource.get(), nullptr, &uavDesc, handle); + } + + virtual void CreateUAV(D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc, ID3D12Resource* counterResource, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateUnorderedAccessView(resource.get(), counterResource, &uavDesc, handle); + } + + virtual void CreateRTV(D3D12_RENDER_TARGET_VIEW_DESC rtvDesc, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateRenderTargetView(resource.get(), &rtvDesc, handle); + } + + virtual void CreateCBV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = resource->GetGPUVirtualAddress(); + cbvDesc.SizeInBytes = static_cast(desc.Width); + + device->CreateConstantBufferView(&cbvDesc, handle); + } + //D3D12_CONSTANT_BUFFER_VIEW_DESC + winrt::com_ptr resource = nullptr; + + protected: + ID3D12Device5* device; + D3D12_RESOURCE_STATES state; + D3D12_RESOURCE_DESC desc; + }; + + class ResourceUpload : public Resource + { + static D3D12_RESOURCE_DESC Desc(UINT64 size, D3D12_RESOURCE_FLAGS flags) + { + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Alignment = 0; + desc.Width = size; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = flags; + + return desc; + } + + public: + explicit ResourceUpload(ID3D12Device5* device, D3D12_RESOURCE_DESC desc) : + Resource(device, D3D12_HEAP_TYPE_DEFAULT, desc, D3D12_RESOURCE_STATE_COPY_DEST), size(size) + { + const auto& heapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + DX::ThrowIfFailed(device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &this->desc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadResource))); + } + + explicit ResourceUpload(ID3D12Device5* device, const uint64_t& size, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) : + Resource(device, D3D12_HEAP_TYPE_DEFAULT, Desc(size, flags), D3D12_RESOURCE_STATE_COPY_DEST), size(size) + { + const auto& heapProps = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + DX::ThrowIfFailed(device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &this->desc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadResource))); + } + + virtual void SetName(LPCWSTR name) const override + { + Resource::SetName(name); + DX::ThrowIfFailed(uploadResource->SetName(std::format(L"{} [Upload]", name).c_str())); + } + + void Update(void const* src_data, size_t data_size) const + { + void* pData; + DX::ThrowIfFailed(uploadResource->Map(0, &readRange, &pData)); + memcpy(pData, src_data, data_size); + D3D12_RANGE writeRange = { 0, data_size }; + uploadResource->Unmap(0, &writeRange); + } + + void Upload(ID3D12GraphicsCommandList4* commandList, D3D12_RESOURCE_STATES finalState = D3D12_RESOURCE_STATE_COMMON) + { + D3D12_RESOURCE_STATES initialState = this->state; + + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->CopyBufferRegion(this->resource.get(), 0, uploadResource.get(), 0, size); + this->TransitionBarrier(commandList, finalState != D3D12_RESOURCE_STATE_COMMON ? finalState : initialState); + } + + winrt::com_ptr uploadResource = nullptr; + + private: + UINT64 size; + D3D12_RANGE readRange = { 0, 0 }; + }; + + class Texture : public Resource + { + static D3D12_RESOURCE_DESC Desc(D3D12_RESOURCE_DIMENSION dimension, UINT64 width, UINT height, DXGI_FORMAT format, D3D12_RESOURCE_FLAGS flags) + { + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = dimension; + desc.Alignment = 0; + desc.Width = width; + desc.Height = height; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = format; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + desc.Flags = flags; + + return desc; + } + + public: + explicit Texture( + ID3D12Device5* device, + D3D12_RESOURCE_DIMENSION dimension, UINT64 width, UINT height, DXGI_FORMAT format, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) : + Resource(device, D3D12_HEAP_TYPE_DEFAULT, Desc(dimension, width, height, format, flags), D3D12_RESOURCE_STATE_COMMON) {} + + virtual ~Texture() = default; + }; + + class Texture2D : public Texture + { + public: + explicit Texture2D( + ID3D12Device5* device, + UINT64 width, UINT height, DXGI_FORMAT format, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) : + Texture(device, D3D12_RESOURCE_DIMENSION_TEXTURE2D, width, height, format, flags) {} + + virtual ~Texture2D() = default; + + virtual void CreateSRV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = desc.MipLevels; + srvDesc.Texture2D.PlaneSlice = 0; + srvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + Texture::CreateSRV(srvDesc, handle); + } + + virtual void CreateUAV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = desc.Format; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + + Texture::CreateUAV(uavDesc, handle); + } + + virtual void CreateRTV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + rtvDesc.Format = desc.Format; + rtvDesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + + Texture::CreateRTV(rtvDesc, handle); + } + }; + + template + class Texture2DUpload : public Texture2D + { + public: + explicit Texture2DUpload( + ID3D12Device5* device, + UINT64 width, UINT height, DXGI_FORMAT format, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) : + Texture2D(device, width, height, format, flags) + { + const auto& uploadHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + + D3D12_RESOURCE_DESC upDesc = Resource::desc; + upDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + UINT64 uploadBufferSize; + device->GetCopyableFootprints(&upDesc, 0, 1, 0, nullptr, nullptr, nullptr, &uploadBufferSize); + + const auto& uploadDesc = CD3DX12_RESOURCE_DESC::Buffer(uploadBufferSize); + + DX::ThrowIfFailed(device->CreateCommittedResource( + &uploadHeap, + D3D12_HEAP_FLAG_NONE, + &uploadDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&uploadResource))); + } + + void Update(void const* src_data, size_t data_size, size_t begin = 0) + { + void* pData; + DX::ThrowIfFailed(uploadResource->Map(0, &readRange, &pData)); + + uint8_t* dst = static_cast(pData) + begin; + memcpy(dst, src_data, data_size); + + D3D12_RANGE writeRange = { begin, begin + data_size }; + uploadResource->Unmap(0, &writeRange); + } + + void UpdateAt(void const* src_data, size_t index = 0) + { + size_t begin = index * sizeof(T); + + void* pData; + DX::ThrowIfFailed(uploadResource->Map(0, &readRange, &pData)); + + uint8_t* dst = static_cast(pData) + begin; + memcpy(dst, src_data, sizeof(T)); + + D3D12_RANGE writeRange = { begin, begin + sizeof(T) }; + uploadResource->Unmap(0, &writeRange); + } + + void UpdateList(T const* src_data, std::int64_t localCount) + { + Update(src_data, sizeof(T) * localCount); + } + + void Upload(ID3D12GraphicsCommandList4* commandList) + { + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->CopyResource(this->resource.get(), uploadResource.get()); + //commandList->CopyBufferRegion(this->resource.get(), 0, uploadResource.get(), 0, sizeof(T) * this->count); + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } + + winrt::com_ptr uploadResource = nullptr; + + private: + D3D12_RANGE readRange = { 0, 0 }; + }; + + template + class StructuredBuffer : public Resource + { + public: + static D3D12_RESOURCE_DESC Desc(UINT64 width, bool uav = false) + { + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Alignment = 0; + desc.Width = width; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = uav ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; + + return desc; + } + + explicit StructuredBuffer(ID3D12Device5* device, const uint64_t& a_count, bool uav = false) : + Resource(device, D3D12_HEAP_TYPE_DEFAULT, Desc(sizeof(T) * a_count, uav), D3D12_RESOURCE_STATE_COPY_DEST), count(a_count) {} + + virtual ~StructuredBuffer() = default; + + virtual void CreateSRV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = static_cast(count); + srvDesc.Buffer.StructureByteStride = sizeof(T); + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + Resource::CreateSRV(srvDesc, handle); + } + + virtual void CreateUAV(ID3D12Resource* counterResource, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = static_cast(count); + uavDesc.Buffer.StructureByteStride = sizeof(T); + + if (counterResource) + uavDesc.Buffer.CounterOffsetInBytes = 0; + + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + Resource::CreateUAV(uavDesc, counterResource, handle); + } + + virtual void CreateUAV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + StructuredBuffer::CreateUAV(nullptr, handle); + } + + protected: + uint64_t count; + }; + + template + class StructuredBufferUpload : public StructuredBuffer + { + public: + explicit StructuredBufferUpload(ID3D12Device5* a_device, const uint64_t& a_count, bool uav = false, uint uploadCount = 1) : + StructuredBuffer(a_device, a_count, uav) + { + const auto& uploadHeap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD); + D3D12_RESOURCE_DESC desc = StructuredBuffer::Desc(Resource::desc.Width); + + uploadResources.resize(uploadCount); + + for (auto i = 0u; i < uploadCount; i++) { + DX::ThrowIfFailed(a_device->CreateCommittedResource( + &uploadHeap, + D3D12_HEAP_FLAG_NONE, + &desc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(uploadResources[i].put()))); + } + } + + virtual void SetName(LPCWSTR name) const override + { + Resource::SetName(name); + + for (size_t i = 0; i < uploadResources.size(); i++) { + DX::ThrowIfFailed(uploadResources[i]->SetName(std::format(L"{} [Upload {}]", name, i).c_str())); + } + } + + void Update(void const* srcData, size_t dataSize, size_t begin = 0, uint uploadIndex = 0) + { + void* pData; + DX::ThrowIfFailed(uploadResources[uploadIndex]->Map(0, &readRange, &pData)); + + uint8_t* dst = static_cast(pData) + begin; + memcpy(dst, srcData, dataSize); + + D3D12_RANGE writeRange = { begin, begin + dataSize }; + uploadResources[uploadIndex]->Unmap(0, &writeRange); + } + + void UpdateAt(T const* srcData, size_t index = 0, uint uploadIndex = 0) + { + size_t begin = index * sizeof(T); + + void* pData; + DX::ThrowIfFailed(uploadResources[uploadIndex]->Map(0, &readRange, &pData)); + + uint8_t* dst = static_cast(pData) + begin; + memcpy(dst, srcData, sizeof(T)); + + D3D12_RANGE writeRange = { begin, begin + sizeof(T) }; + uploadResources[uploadIndex]->Unmap(0, &writeRange); + } + + void UpdateList(T const* srcData, uint64_t localCount, uint uploadIndex = 0) + { + Update(srcData, sizeof(T) * localCount, 0, uploadIndex); + } + + void Upload(ID3D12GraphicsCommandList4* commandList, uint uploadIndex = 0, D3D12_RESOURCE_STATES finalState = D3D12_RESOURCE_STATE_COMMON) + { + D3D12_RESOURCE_STATES state = this->state; + + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->CopyResource(this->resource.get(), uploadResources[uploadIndex].get()); + this->TransitionBarrier(commandList, finalState != D3D12_RESOURCE_STATE_COMMON ? finalState : state); + } + + void UploadRegion(ID3D12GraphicsCommandList4* commandList, uint64_t dataSize, uint64_t offset, uint uploadIndex = 0, D3D12_RESOURCE_STATES finalState = D3D12_RESOURCE_STATE_COMMON) + { + D3D12_RESOURCE_STATES state = this->state; + + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->CopyBufferRegion(this->resource.get(), offset, uploadResources[uploadIndex].get(), offset, dataSize); + this->TransitionBarrier(commandList, finalState != D3D12_RESOURCE_STATE_COMMON ? finalState : state); + } + + ID3D12Resource* UploadResource(uint index = 0) + { + return uploadResources[index].get(); + } + + private: + eastl::vector> uploadResources; + D3D12_RANGE readRange = { 0, 0 }; + }; + + template + class StructuredAppendBuffer : public StructuredBuffer + { + public: + explicit StructuredAppendBuffer(ID3D12Device5* device, const uint64_t& count, bool uav = true) : + StructuredBuffer(device, count, uav) + { + // Create 4-byte counter buffer + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Alignment = 0; + desc.Width = 4; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + + const auto& heap = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + DX::ThrowIfFailed(device->CreateCommittedResource( + &heap, + D3D12_HEAP_FLAG_NONE, + &desc, + D3D12_RESOURCE_STATE_UNORDERED_ACCESS, + nullptr, + IID_PPV_ARGS(&counterBuffer))); + } + + void CreateSRV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) override + { + StructuredBuffer::CreateSRV(handle); + } + + void CreateUAV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) override + { + StructuredBuffer::CreateUAV(counterBuffer.get(), handle); + } + + winrt::com_ptr counterBuffer = nullptr; + }; +} \ No newline at end of file diff --git a/src/Features/Raytracing/BufferMA.h b/src/Features/Raytracing/BufferMA.h new file mode 100644 index 0000000000..4c4f08363f --- /dev/null +++ b/src/Features/Raytracing/BufferMA.h @@ -0,0 +1,310 @@ +#pragma once + +#include +#include +#include + +namespace DX12 +{ + class ResourceMA + { + public: + explicit ResourceMA(ID3D12Device5* device, D3D12MA::Allocator* allocator, D3D12MA::ALLOCATION_DESC allocDesc, const D3D12_RESOURCE_DESC& desc, D3D12_RESOURCE_STATES initialState) : + device(device), allocator(allocator), desc(desc) + { + DX::ThrowIfFailed(allocator->CreateResource(&allocDesc, &desc, initialState, nullptr, allocation.put(), IID_PPV_ARGS(&resource))); + + state = initialState; + } + + virtual ~ResourceMA() = default; + + virtual void SetName(LPCWSTR name) const + { + DX::ThrowIfFailed(resource->SetName(name)); + } + + D3D12_RESOURCE_STATES GetState() const + { + return state; + } + + virtual CD3DX12_RESOURCE_BARRIER GetTransitionBarrier(bool setState, D3D12_RESOURCE_STATES stateAfter, UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) + { + D3D12_RESOURCE_STATES stateBefore = state; + + if (setState) + state = stateAfter; + + return CD3DX12_RESOURCE_BARRIER::Transition(resource.get(), stateBefore, stateAfter, subresource); + } + + virtual bool GetTransitionBarrier(D3D12_RESOURCE_STATES stateAfter, CD3DX12_RESOURCE_BARRIER& barrier, UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) + { + if (state == stateAfter) + return false; + + barrier = CD3DX12_RESOURCE_BARRIER::Transition(resource.get(), state, stateAfter, subresource); + + state = stateAfter; + + return true; + } + + virtual void TransitionBarrier(ID3D12GraphicsCommandList4* commandList, D3D12_RESOURCE_STATES stateAfter, UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES) + { + if (state == stateAfter) + return; + + const auto& resourceBarrier = CD3DX12_RESOURCE_BARRIER::Transition(resource.get(), state, stateAfter, subresource); + commandList->ResourceBarrier(1, &resourceBarrier); + + state = stateAfter; + } + + virtual void UAVBarrier(ID3D12GraphicsCommandList4* commandList) + { + const auto& resourceBarrier = CD3DX12_RESOURCE_BARRIER::UAV(resource.get()); + commandList->ResourceBarrier(1, &resourceBarrier); + } + + virtual void CreateSRV(D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateShaderResourceView(resource.get(), &srvDesc, handle); + } + + virtual void CreateUAV(D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateUnorderedAccessView(resource.get(), nullptr, &uavDesc, handle); + } + + virtual void CreateUAV(D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc, ID3D12Resource* counterResource, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateUnorderedAccessView(resource.get(), counterResource, &uavDesc, handle); + } + + virtual void CreateRTV(D3D12_RENDER_TARGET_VIEW_DESC rtvDesc, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + device->CreateRenderTargetView(resource.get(), &rtvDesc, handle); + } + + virtual void CreateCBV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = resource->GetGPUVirtualAddress(); + cbvDesc.SizeInBytes = static_cast(desc.Width); + + device->CreateConstantBufferView(&cbvDesc, handle); + } + + winrt::com_ptr allocation = nullptr; + winrt::com_ptr resource = nullptr; + + protected: + ID3D12Device5* device; + D3D12MA::Allocator* allocator; + D3D12_RESOURCE_STATES state; + D3D12_RESOURCE_DESC desc; + }; + + class ResourceUploadMA : public ResourceMA + { + static D3D12_RESOURCE_DESC Desc(UINT64 size, D3D12_RESOURCE_FLAGS flags) + { + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Alignment = 0; + desc.Width = size; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = flags; + + return desc; + } + + public: + explicit ResourceUploadMA(ID3D12Device5* device, D3D12MA::Allocator* allocator, D3D12MA::ALLOCATION_DESC allocDesc, D3D12MA::ALLOCATION_DESC uploadAllocDesc, const uint64_t& size, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) : + ResourceMA(device, allocator, allocDesc, Desc(size, flags), D3D12_RESOURCE_STATE_COPY_DEST), size(size) + { + DX::ThrowIfFailed(allocator->CreateResource(&uploadAllocDesc, &desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, uploadAllocation.put(), IID_PPV_ARGS(&uploadResource))); + } + + void Update(void const* src_data, size_t data_size) const + { + void* pData; + DX::ThrowIfFailed(uploadResource->Map(0, &readRange, &pData)); + memcpy(pData, src_data, data_size); + D3D12_RANGE writeRange = { 0, data_size }; + uploadResource->Unmap(0, &writeRange); + } + + void Upload(ID3D12GraphicsCommandList4* commandList) + { + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->CopyBufferRegion(this->resource.get(), 0, uploadResource.get(), 0, size); + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + } + + winrt::com_ptr uploadAllocation = nullptr; + winrt::com_ptr uploadResource = nullptr; + + private: + UINT64 size; + D3D12_RANGE readRange = { 0, 0 }; + }; + + template + class StructuredBufferMA : public ResourceMA + { + public: + static D3D12_RESOURCE_DESC Desc(UINT64 width, bool uav = false) + { + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Alignment = 0; + desc.Width = width; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = uav ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE; + + return desc; + } + + explicit StructuredBufferMA(ID3D12Device5* device, D3D12MA::Allocator* allocator, D3D12MA::ALLOCATION_DESC allocDesc, const uint64_t& a_count, bool uav = false) : + ResourceMA(device, allocator, allocDesc, Desc(sizeof(T) * a_count, uav), D3D12_RESOURCE_STATE_COPY_DEST), count(a_count) {} + + virtual ~StructuredBufferMA() = default; + + virtual void CreateSRV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = static_cast(count); + srvDesc.Buffer.StructureByteStride = sizeof(T); + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + ResourceMA::CreateSRV(srvDesc, handle); + } + + virtual void CreateUAV(ID3D12Resource* counterResource, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = static_cast(count); + uavDesc.Buffer.StructureByteStride = sizeof(T); + + if (counterResource) + uavDesc.Buffer.CounterOffsetInBytes = 0; + + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + ResourceMA::CreateUAV(uavDesc, counterResource, handle); + } + + virtual void CreateUAV(CD3DX12_CPU_DESCRIPTOR_HANDLE handle) + { + StructuredBufferMA::CreateUAV(nullptr, handle); + } + + protected: + uint64_t count; + }; + + template + class StructuredBufferUploadMA : public StructuredBufferMA + { + public: + explicit StructuredBufferUploadMA(ID3D12Device5* a_device, D3D12MA::Allocator* allocator, D3D12MA::ALLOCATION_DESC allocDesc, D3D12MA::ALLOCATION_DESC uploadAllocDesc, const uint64_t& a_count, bool uav = false, uint uploadCount = 1) : + StructuredBufferMA(a_device, allocator, allocDesc, a_count, uav) + { + D3D12_RESOURCE_DESC desc = StructuredBufferMA::Desc(ResourceMA::desc.Width); + + uploadAllocations.resize(uploadCount); + uploadResources.resize(uploadCount); + + for (auto i = 0u; i < uploadCount; i++) { + DX::ThrowIfFailed(allocator->CreateResource(&uploadAllocDesc, &desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, uploadAllocations[i].put(), IID_PPV_ARGS(&uploadResources[i]))); + } + } + + virtual void SetName(LPCWSTR name) const override + { + ResourceMA::SetName(name); + + for (size_t i = 0; i < uploadResources.size(); i++) { + DX::ThrowIfFailed(uploadResources[i]->SetName(std::format(L"{} [Upload {}]", name, i).c_str())); + } + } + + void Update(void const* srcData, size_t dataSize, size_t begin = 0, uint uploadIndex = 0) + { + void* pData; + DX::ThrowIfFailed(uploadResources[uploadIndex]->Map(0, &readRange, &pData)); + + uint8_t* dst = static_cast(pData) + begin; + memcpy(dst, srcData, dataSize); + + D3D12_RANGE writeRange = { begin, begin + dataSize }; + uploadResources[uploadIndex]->Unmap(0, &writeRange); + } + + void UpdateAt(T const* srcData, size_t index = 0, uint uploadIndex = 0) + { + size_t begin = index * sizeof(T); + + void* pData; + DX::ThrowIfFailed(uploadResources[uploadIndex]->Map(0, &readRange, &pData)); + + uint8_t* dst = static_cast(pData) + begin; + memcpy(dst, srcData, sizeof(T)); + + D3D12_RANGE writeRange = { begin, begin + sizeof(T) }; + uploadResources[uploadIndex]->Unmap(0, &writeRange); + } + + void UpdateList(T const* srcData, uint64_t localCount, uint uploadIndex = 0) + { + Update(srcData, sizeof(T) * localCount, 0, uploadIndex); + } + + void Upload(ID3D12GraphicsCommandList4* commandList, uint uploadIndex = 0, D3D12_RESOURCE_STATES finalState = D3D12_RESOURCE_STATE_COMMON) + { + D3D12_RESOURCE_STATES state = this->state; + + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->CopyResource(this->resource.get(), uploadResources[uploadIndex].get()); + this->TransitionBarrier(commandList, finalState != D3D12_RESOURCE_STATE_COMMON ? finalState : state); + } + + // dataSize, offset arguments order to match Update function + void UploadRegion(ID3D12GraphicsCommandList4* commandList, uint64_t dataSize, uint64_t offset, uint uploadIndex = 0, D3D12_RESOURCE_STATES finalState = D3D12_RESOURCE_STATE_COMMON) + { + D3D12_RESOURCE_STATES state = this->state; + + this->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_COPY_DEST); + commandList->CopyBufferRegion(this->resource.get(), offset, uploadResources[uploadIndex].get(), offset, dataSize); + this->TransitionBarrier(commandList, finalState != D3D12_RESOURCE_STATE_COMMON ? finalState : state); + } + + eastl::vector> uploadAllocations; + eastl::vector> uploadResources; + + private: + D3D12_RANGE readRange = { 0, 0 }; + }; +} \ No newline at end of file diff --git a/src/Features/Raytracing/Core/Instance.cpp b/src/Features/Raytracing/Core/Instance.cpp new file mode 100644 index 0000000000..a3d0f31a90 --- /dev/null +++ b/src/Features/Raytracing/Core/Instance.cpp @@ -0,0 +1,73 @@ +#include "Instance.h" + +#include "Features/Raytracing.h" + +void Instance::SetDetached(bool detach) +{ + detached = detach; +} + +bool Instance::IsDetached() const +{ + return detached; +} + +bool Instance::SkipUpdate(RE::NiAVObject* node, const RE::NiPoint3& cameraPosition) +{ + auto& rt = globals::features::raytracing; + + if (!rt.settings.AdvancedSettings.VariableUpdateRate) + return false; + + const uint64_t delta = rt.frameIndex - lastUpdate; + + const float distance = Util::Units::GameUnitsToMeters(node->worldBound.center.GetDistance(cameraPosition)); + + const uint64_t interval = UpdateInterval(distance); + + if (delta < interval) + return true; + + lastUpdate = rt.frameIndex; + + return false; +} + +// Checks for skinned and dynamic trishapes update +void Instance::Update(RE::NiAVObject* node, const RE::NiPoint3& cameraPosition, const eastl::pair& modelPair, [[maybe_unused]] SkinningPipeline* skinningPipeline) +{ + // Instance was not changed by the game, so there is no need to update it + // This doesn't work at all for actors + /*if (pNiNode->lastUpdatedFrameCounter < globals::state->frameCount && hasUpdated) + return true;*/ + + auto& [path, model] = modelPair; + + // Instance has already been updated this frame + if (SkipUpdate(node, cameraPosition)) + return; + + // Sets the BLAS instance transform + XMStoreFloat3x4(&transform, GetXMFromNiTransform(node->world)); + + /*if (node->GetAppCulled()) + return;*/ + + for (auto& shape : model->shapes) { + auto updateFlags = shape->Update(); + + if (shape->IsDirtyState()) { + model->flags.set(Model::Flags::BLASRebuild); + + /*logger::info("Instance::Update {} 0x{:08X} - {} 0x{:08X} - Hidden: {}", + path, reinterpret_cast(model), + shape->geometry->name, reinterpret_cast(shape.get()), + shape->IsPendingHidden());*/ + } + + if ((updateFlags & Shape::Flags::Dynamic) || (updateFlags & Shape::Flags::Skinned)) { + model->flags.set(Model::Flags::BLASUpdate); + skinningPipeline->QueueUpdate(updateFlags, path, shape.get()); + } + } +} \ No newline at end of file diff --git a/src/Features/Raytracing/Core/Instance.h b/src/Features/Raytracing/Core/Instance.h new file mode 100644 index 0000000000..10e543c007 --- /dev/null +++ b/src/Features/Raytracing/Core/Instance.h @@ -0,0 +1,53 @@ +#pragma once + +#include "PCH.h" + +#include + +#include "Features/Raytracing/Core/Model.h" +#include "Features/Raytracing/Core/Shape.h" + +#include "Features/Raytracing/Pipelines/SkinningPipeline.h" + +struct Instance +{ + enum State : uint8_t + { + Hidden = 1 << 0, + Detached = 1 << 1 + }; + + Instance(RE::FormID formID, eastl::string filename) : + formID(formID), filename(filename) {}; + + void SetDetached(bool detach); + + bool IsDetached() const; + + static uint UpdateInterval(float distance) + { + float t = std::log2((distance - 25.0f) + 1.0f) * 0.3f; + return std::clamp(static_cast(t), 0u, 30u); + } + // + + bool SkipUpdate(RE::NiAVObject* node, const RE::NiPoint3& cameraPosition); + + // Checks for skinned and dynamic trishapes update + void Update(RE::NiAVObject* node, const RE::NiPoint3& cameraPosition, const eastl::pair& modelPair, SkinningPipeline* skinningPipeline); + + // Instance form id + RE::FormID formID; + + // What model this instance references + eastl::string filename; + + // Used for BLAS instance + float3x4 transform; + + // Makes sure we only update once per frame + uint64_t lastUpdate = 0; + +private: + bool detached = false; +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Core/Material.h b/src/Features/Raytracing/Core/Material.h new file mode 100644 index 0000000000..d1ee62a022 --- /dev/null +++ b/src/Features/Raytracing/Core/Material.h @@ -0,0 +1,232 @@ +#pragma once + +#include "PCH.h" + +#include +#include + +#include "Raytracing/Includes/Types/Material.hlsli" + +using namespace magic_enum::bitwise_operators; + +struct Material +{ + static constexpr uint MAX_LAND_TEXTURES = 5u; + static constexpr uint MAX_PBRLAND_TEXTURES = 6u; + + enum ShaderType : uint16_t + { + TruePBR = 0, + Lighting = 1, + Effect = 2, + Grass = 3, + Water = 4, + BloodSplatter = 5, + DistantTree = 6, + Particle = 7 + }; + + // We have a limited number of bits and not all types are necessary + ShaderType GetShaderType() const + { + if (shaderFlags.any(RE::BSShaderProperty::EShaderPropertyFlag::kMenuScreen)) + return ShaderType::TruePBR; + + switch (shaderType) { + case RE::BSShader::Type::Grass: + return ShaderType::Grass; + case RE::BSShader::Type::Water: + return ShaderType::Water; + case RE::BSShader::Type::BloodSplatter: + return ShaderType::BloodSplatter; + case RE::BSShader::Type::Effect: + return ShaderType::Effect; + case RE::BSShader::Type::DistantTree: + return ShaderType::DistantTree; + case RE::BSShader::Type::Particle: + return ShaderType::Particle; + default: + return ShaderType::Lighting; + } + } + + enum ShaderFlags : uint32_t + { + None = 0, + kSpecular = 1 << 0, + kTempRefraction = 1 << 1, + kVertexAlpha = 1 << 2, + kGrayscaleToPaletteColor = 1 << 3, + kGrayscaleToPaletteAlpha = 1 << 4, + kFalloff = 1 << 5, + kEnvMap = 1 << 6, + kFace = 1 << 7, + kModelSpaceNormals = 1 << 8, + kRefraction = 1 << 9, + kProjectedUV = 1 << 10, + kExternalEmittance = 1 << 11, + kVertexColors = 1 << 12, + kMultiTextureLandscape = 1 << 13, + kEyeReflect = 1 << 14, + kHairTint = 1 << 15, + kTwoSided = 1 << 16, + kAssumeShadowmask = 1 << 17, + kBackLighting = 1 << 18, + kTreeAnim = 1 << 19 + }; + + enum AlphaFlags : uint16_t + { + kOpaque = 0, + kAlphaBlend = 1 << 0, + kAlphaTest = 1 << 1 + }; + + ShaderFlags GetShaderFlags() const + { + using EShaderPropertyFlag = RE::BSShaderProperty::EShaderPropertyFlag; + + auto shaderFlagsLocal = ShaderFlags::None; + + /*const auto& entries = magic_enum::enum_entries(); + const auto& originalEntries = magic_enum::enum_entries(); + + for (const auto& [flag, name] : entries) { + for (const auto& [originalFlag, originalName] : originalEntries) { + if (shaderFlags.any(originalFlag) && name == originalName) { + shaderFlagsLocal |= flag; + break; + } + } + }*/ + + if (shaderFlags.any(EShaderPropertyFlag::kSpecular)) { + shaderFlagsLocal |= ShaderFlags::kSpecular; + } + + if (shaderFlags.any(EShaderPropertyFlag::kTempRefraction)) { + shaderFlagsLocal |= ShaderFlags::kTempRefraction; + } + + if (shaderFlags.any(EShaderPropertyFlag::kVertexAlpha)) { + shaderFlagsLocal |= ShaderFlags::kVertexAlpha; + } + + if (shaderFlags.any(EShaderPropertyFlag::kGrayscaleToPaletteColor)) { + shaderFlagsLocal |= ShaderFlags::kGrayscaleToPaletteColor; + } + + if (shaderFlags.any(EShaderPropertyFlag::kGrayscaleToPaletteAlpha)) { + shaderFlagsLocal |= ShaderFlags::kGrayscaleToPaletteAlpha; + } + + if (shaderFlags.any(EShaderPropertyFlag::kFalloff)) { + shaderFlagsLocal |= ShaderFlags::kFalloff; + } + + if (shaderFlags.any(EShaderPropertyFlag::kEnvMap)) { + shaderFlagsLocal |= ShaderFlags::kEnvMap; + } + + if (shaderFlags.any(EShaderPropertyFlag::kFace)) { + shaderFlagsLocal |= ShaderFlags::kFace; + } + + if (shaderFlags.any(EShaderPropertyFlag::kModelSpaceNormals)) { + shaderFlagsLocal |= ShaderFlags::kModelSpaceNormals; + } + + if (shaderFlags.any(EShaderPropertyFlag::kRefraction)) { + shaderFlagsLocal |= ShaderFlags::kRefraction; + } + + if (shaderFlags.any(EShaderPropertyFlag::kProjectedUV)) { + shaderFlagsLocal |= ShaderFlags::kProjectedUV; + } + + if (shaderFlags.any(EShaderPropertyFlag::kExternalEmittance)) { + shaderFlagsLocal |= ShaderFlags::kExternalEmittance; + } + + if (shaderFlags.any(EShaderPropertyFlag::kVertexColors)) { + shaderFlagsLocal |= ShaderFlags::kVertexColors; + } + + if (shaderFlags.any(EShaderPropertyFlag::kMultiTextureLandscape)) { + shaderFlagsLocal |= ShaderFlags::kMultiTextureLandscape; + } + + if (shaderFlags.any(EShaderPropertyFlag::kEyeReflect)) { + shaderFlagsLocal |= ShaderFlags::kEyeReflect; + } + + if (shaderFlags.any(EShaderPropertyFlag::kHairTint)) { + shaderFlagsLocal |= ShaderFlags::kHairTint; + } + + if (shaderFlags.any(EShaderPropertyFlag::kTwoSided)) { + shaderFlagsLocal |= ShaderFlags::kTwoSided; + } + + if (shaderFlags.any(EShaderPropertyFlag::kAssumeShadowmask)) { + shaderFlagsLocal |= ShaderFlags::kAssumeShadowmask; + } + + if (shaderFlags.any(EShaderPropertyFlag::kBackLighting)) { + shaderFlagsLocal |= ShaderFlags::kBackLighting; + } + + if (shaderFlags.any(EShaderPropertyFlag::kTreeAnim)) { + shaderFlagsLocal |= ShaderFlags::kTreeAnim; + } + + return shaderFlagsLocal; + } + + REX::EnumSet shaderFlags; + RE::BSShader::Type shaderType; + RE::BSShaderMaterial::Feature Feature; + stl::enumeration PBRFlags; + + uint16_t AlphaFlags; + + eastl::array Colors; + eastl::array Scalars; + + eastl::array TexCoordOffsetScale; + + eastl::array, 20> Textures; + + MaterialData GetData() const + { + return MaterialData( + TexCoordOffsetScale[0], TexCoordOffsetScale[1], + Colors[0], Colors[1], Colors[2], + Scalars[0], Scalars[1], Scalars[2], Scalars[3], + AlphaFlags, + Textures[0]->GetIndex(), + Textures[1]->GetIndex(), + Textures[2]->GetIndex(), + Textures[3]->GetIndex(), + Textures[4]->GetIndex(), + Textures[5]->GetIndex(), + Textures[6]->GetIndex(), + Textures[7]->GetIndex(), + Textures[8]->GetIndex(), + Textures[9]->GetIndex(), + Textures[10]->GetIndex(), + Textures[11]->GetIndex(), + Textures[12]->GetIndex(), + Textures[13]->GetIndex(), + Textures[14]->GetIndex(), + Textures[15]->GetIndex(), + Textures[16]->GetIndex(), + Textures[17]->GetIndex(), + Textures[18]->GetIndex(), + Textures[19]->GetIndex(), + GetShaderType(), + static_cast(Feature), + PBRFlags.underlying(), + static_cast(GetShaderFlags())); + } +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Core/Model.cpp b/src/Features/Raytracing/Core/Model.cpp new file mode 100644 index 0000000000..18dc44d132 --- /dev/null +++ b/src/Features/Raytracing/Core/Model.cpp @@ -0,0 +1,289 @@ +#include "Model.h" + +#include "Features/Raytracing.h" +#include "Features/Raytracing/Helpers/ModelSpaceToTangent.h" + +void Model::ConvertMSN() +{ + eastl::unordered_map> msnMaps; + + uint vertexCount = 0; + uint triangleCount = 0; + + for (auto& shape : shapes) { + auto& material = shape->material; + + if (material.shaderFlags.none(RE::BSShaderProperty::EShaderPropertyFlag::kModelSpaceNormals)) + continue; + + vertexCount = std::max(vertexCount, shape->vertexCount); + triangleCount = std::max(triangleCount, shape->triangleCount); + + auto key = material.Textures.at(RTConstants::MATERIAL_NORMALMAP_ID)->GetIndex(); + + if (auto msnMap = msnMaps.find(key); msnMap != msnMaps.end()) { + msnMap->second.push_back(shape.get()); + } else { + msnMaps.emplace(key, eastl::vector{ shape.get() }); + } + } + + if (msnMaps.empty()) + return; + + auto device = globals::d3d::device; + auto context = globals::d3d::context; + + // Vertex Buffer + winrt::com_ptr vertexBuffer; + { + D3D11_BUFFER_DESC desc{}; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.ByteWidth = sizeof(ModelSpaceToTangent::UnpackedVertex) * vertexCount; + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + DX::ThrowIfFailed(device->CreateBuffer(&desc, nullptr, vertexBuffer.put())); + } + + // Index Buffer + winrt::com_ptr indexBuffer; + { + D3D11_BUFFER_DESC desc{}; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.ByteWidth = sizeof(uint16_t) * triangleCount * 3; + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + + DX::ThrowIfFailed(device->CreateBuffer(&desc, nullptr, indexBuffer.put())); + } + + eastl::vector vertices; + auto& rt = globals::features::raytracing; + + for (auto& [allocation, msnShapes] : msnMaps) { + auto msnIt = rt.allocationMSNormalMaps.find(allocation); + + if (msnIt == rt.allocationMSNormalMaps.end()) + continue; + + auto* msnMap = msnIt->second; + + auto normalMapIt = rt.normalMaps.find(msnMap); + + if (normalMapIt == rt.normalMaps.end()) + continue; + + auto* convertedNormalMap = normalMapIt->second.get(); + + if (convertedNormalMap->converted) + continue; + + rt.normalMapConverter->Setup(msnMap); + + context->PSSetShaderResources(0, 1, &convertedNormalMap->OriginalSRV); + + ID3D11RenderTargetView* rtv = convertedNormalMap->Texture->rtv.get(); + context->OMSetRenderTargets(1, &rtv, nullptr); + + // We will continuously render and blend the final result to the same texture + for (auto* shape : msnShapes) { + // Update Vertex Buffer + { + vertices.resize(shape->vertexCount); + + for (size_t i = 0; i < shape->vertexCount; i++) { + vertices[i] = shape->vertices[i]; + } + + D3D11_MAPPED_SUBRESOURCE mapped; + context->Map(vertexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + + memcpy(mapped.pData, vertices.data(), sizeof(ModelSpaceToTangent::UnpackedVertex) * shape->vertexCount); + + context->Unmap(vertexBuffer.get(), 0); + } + + // Update Index Buffer + { + D3D11_MAPPED_SUBRESOURCE mapped; + context->Map(indexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped); + + memcpy(mapped.pData, shape->triangles.data(), sizeof(Triangle) * shape->triangleCount); + + context->Unmap(indexBuffer.get(), 0); + } + + rt.normalMapConverter->SetVertexShader(shape->flags.any(Shape::Flags::Dynamic)); + + rt.normalMapConverter->Draw(vertexBuffer.get(), indexBuffer.get(), shape->triangleCount); + } + + convertedNormalMap->converted = true; + + rt.allocationMSNormalMaps.erase(allocation); + } +} + +bool Model::BLASBuildExecuted() const +{ + return blasBuilt && blasBuildFrame < globals::features::raytracing.frameIndex; +} + +bool Model::BLASUpdateQueued() const +{ + return blasUpdateFrame == globals::features::raytracing.frameIndex; +} + +void Model::BuildBLAS(ID3D12GraphicsCommandList4* commandList) +{ + auto& rt = globals::features::raytracing; + + std::lock_guard lock{ rt.renderMutex }; + + geometryDescs.resize(shapes.size()); + + // Initial build with all shapes, visible or not, so the scratch buffer can be sized to fit all geometry + for (size_t i = 0; i < shapes.size(); i++) { + geometryDescs[i] = shapes[i]->GeometryDesc(); + } + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = { + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL, + .Flags = BuildFlags(), + .NumDescs = static_cast(geometryDescs.size()), + .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY, + .pGeometryDescs = geometryDescs.data() + }; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; + rt.d3d12Device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo); + + D3D12_RESOURCE_DESC desc = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Width = std::max(prebuildInfo.ScratchDataSizeInBytes, prebuildInfo.UpdateScratchDataSizeInBytes) * 2, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .SampleDesc = Raytracing::NO_AA, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS + }; + + auto blasScratchDesc = Raytracing::DEFAULT_HEAP_MA; + blasScratchDesc.CustomPool = rt.blasScratchPool.get(); + + DX::ThrowIfFailed(rt.allocator->CreateResource(&blasScratchDesc, &desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, blasScratchBuffer.put(), IID_NULL, NULL)); + + auto blasDesc = Raytracing::DEFAULT_HEAP_MA; + blasDesc.CustomPool = rt.blasPool.get(); + + desc.Width = prebuildInfo.ResultDataMaxSizeInBytes; + DX::ThrowIfFailed(rt.allocator->CreateResource(&blasDesc, &desc, D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE, nullptr, blasBuffer.put(), IID_NULL, NULL)); + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = { + .DestAccelerationStructureData = blasBuffer->GetResource()->GetGPUVirtualAddress(), + .Inputs = inputs, + .SourceAccelerationStructureData = 0, + .ScratchAccelerationStructureData = blasScratchBuffer->GetResource()->GetGPUVirtualAddress() + }; + + commandList->BuildRaytracingAccelerationStructure(&buildDesc, 0, nullptr); + + // Register frame that BLAS was created + blasBuilt = true; + blasBuildFrame = rt.frameIndex; + + const auto& asBarrier = CD3DX12_RESOURCE_BARRIER::UAV(blasBuffer->GetResource()); + commandList->ResourceBarrier(1, &asBarrier); +} + +bool Model::UpdateBLAS(ID3D12GraphicsCommandList4* commandList) +{ + const bool update = flags.any(Flags::BLASUpdate); + const bool rebuild = flags.any(Flags::BLASRebuild); + + if (!update && !rebuild) + return false; + + if (!BLASBuildExecuted()) + return false; + + if (BLASUpdateQueued()) + return false; + + if (update && shapeflags.none(Shape::Flags::Skinned,Shape::Flags::Dynamic)) { + logger::critical("[RT] Model::UpdateBLAS - Only Skinned and Dynamic geometry should get the 'BLASUpdate' flag - [0x{:08X}]", reinterpret_cast(this)); + + flags.reset(Flags::BLASUpdate); + + if (!rebuild) + return false; + } + + geometryDescs.clear(); + geometryDescs.reserve(shapes.size()); + + for (auto& shape : shapes) { + if (rebuild) { + if (shape->IsPendingHidden()) + continue; + } else { + if (shape->IsHidden()) + continue; + } + + geometryDescs.push_back(shape->GeometryDesc()); + } + + if (geometryDescs.empty()) { + logger::warn("[RT] Model::UpdateBLAS - Empty Geometry Descs"); + return false; + } + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS inputs = { + .Type = D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL, + .Flags = UpdateFlags(rebuild), + .NumDescs = static_cast(geometryDescs.size()), + .DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY, + .pGeometryDescs = geometryDescs.data() + }; + + /*D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO prebuildInfo; + globals::features::raytracing.d3d12Device->GetRaytracingAccelerationStructurePrebuildInfo(&inputs, &prebuildInfo); + + if (prebuildInfo.ResultDataMaxSizeInBytes > blasBuffer->GetResource()->GetDesc().Width) { + logger::critical("[RT] ResultDataMaxSizeInBytes greater than current resource size."); + } + + auto scratchWidth = blasScratchBuffer->GetResource()->GetDesc().Width; + + if (prebuildInfo.ScratchDataSizeInBytes > scratchWidth) { + logger::critical("[RT] ScratchDataSizeInBytes greater than current scratch resource size."); + } + + if (prebuildInfo.UpdateScratchDataSizeInBytes > scratchWidth) { + logger::critical("[RT] UpdateScratchDataSizeInBytes greater than current scratch resource size."); + }*/ + + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC buildDesc = { + .DestAccelerationStructureData = blasBuffer->GetResource()->GetGPUVirtualAddress(), + .Inputs = inputs, + .SourceAccelerationStructureData = rebuild ? 0 : blasBuffer->GetResource()->GetGPUVirtualAddress(), + .ScratchAccelerationStructureData = blasScratchBuffer->GetResource()->GetGPUVirtualAddress() + }; + + commandList->BuildRaytracingAccelerationStructure(&buildDesc, 0, nullptr); + + if (rebuild) + for (auto& shape : shapes) { + shape->UpdateState(); + } + + flags.reset(Flags::BLASUpdate, Flags::BLASRebuild); + + // Register frame that BLAS was updated + blasUpdateFrame = globals::features::raytracing.frameIndex; + + return true; +} \ No newline at end of file diff --git a/src/Features/Raytracing/Core/Model.h b/src/Features/Raytracing/Core/Model.h new file mode 100644 index 0000000000..e58a587e75 --- /dev/null +++ b/src/Features/Raytracing/Core/Model.h @@ -0,0 +1,138 @@ +#pragma once + +#include "PCH.h" + +#include + +#include "State.h" + +#include "Features/Raytracing/Buffer.h" +#include "Features/Raytracing/Types.h" + +#include "Features/Raytracing/Core/Shape.h" + +#include "Raytracing/Includes/Types/Material.hlsli" +#include "Raytracing/Includes/Types/Skinning.hlsli" +#include "Raytracing/Includes/Types/Triangle.hlsli" +#include "Raytracing/Includes/Types/Vertex.hlsli" + +struct Model +{ + enum Flags : uint8_t { + BLASUpdate = 1 << 0, + BLASRebuild = 1 << 1 + }; + + eastl::vector> shapes; + + winrt::com_ptr blasBuffer = nullptr; + winrt::com_ptr blasScratchBuffer = nullptr; + + eastl::vector geometryDescs; + + stl::enumeration flags; + + Model(eastl::vector>& shapes) : + shapes(eastl::move(shapes)) + { + for (auto& shape : this->shapes) { + shapeflags.set(shape->flags.get()); + shaderTypes |= shape->material.shaderType; + features |= static_cast(shape->material.Feature); + shaderFlags.set(shape->material.shaderFlags.get()); + } + } + + static std::string KeySuffix(RE::NiAVObject* root) + { + return std::format("_{:08X}", reinterpret_cast(root)); + } + + auto GetShapeFlags() const + { + return shapeflags; + } + + uint32_t GetShaderTypes() const + { + return shaderTypes; + } + + auto GetFeatures() const + { + return features; + } + + + auto GetShaderFlags() const + { + return shaderFlags; + } + + bool ShouldQueueMSNConversion() const + { + for (auto& shape : shapes) { + if (shape->material.shaderFlags.any(RE::BSShaderProperty::EShaderPropertyFlag::kModelSpaceNormals)) + return true; + } + + return false; + } + + void ConvertMSN(); + + bool BLASBuildExecuted() const; + + bool BLASUpdateQueued() const; + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS BuildFlags() const + { + if (shapeflags.any(Shape::Flags::Dynamic, Shape::Flags::Skinned)) + return D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE | D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD; + + return D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE; + } + + D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS UpdateFlags(bool rebuild) const + { + if (rebuild) + return BuildFlags(); + + return D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD | D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE; + } + + void BuildBLAS(ID3D12GraphicsCommandList4* commandList); + + bool UpdateBLAS(ID3D12GraphicsCommandList4* commandList); + + bool HideShape(Shape* shape) const + { + return BLASBuildExecuted() && shape->IsHidden(); + } + + uint64_t BLASUpdateFrame() const + { + return blasUpdateFrame; + } + + void AddRef() + { + refCount.fetch_add(1, eastl::memory_order_relaxed); + } + + // Returns refCount + int Release() + { + return refCount.fetch_sub(1, eastl::memory_order_acq_rel) - 1; + } + +private: + stl::enumeration shapeflags = Shape::Flags::None; + uint32_t shaderTypes = RE::BSShader::Type::None; + int features = static_cast(RE::BSShaderMaterial::Feature::kNone); + REX::EnumSet shaderFlags; + bool blasBuilt = false; + uint64_t blasBuildFrame; + uint64_t blasUpdateFrame; + eastl::atomic refCount{ 0 }; +}; diff --git a/src/Features/Raytracing/Core/Shape.cpp b/src/Features/Raytracing/Core/Shape.cpp new file mode 100644 index 0000000000..e561a2e1fd --- /dev/null +++ b/src/Features/Raytracing/Core/Shape.cpp @@ -0,0 +1,1090 @@ +#include "Shape.h" +#include "Features/Raytracing.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/Pipelines/SkinningPipeline.h" + +#include "TruePBR.h" +#include "TruePBR/BSLightingShaderMaterialPBR.h" +#include "TruePBR/BSLightingShaderMaterialPBRLandscape.h" + +using GIHeap = Raytracing::GIHeap; + +static std::uint32_t GetVertexSize(RE::BSGraphics::Vertex::Flags flags) +{ + using RE::BSGraphics::Vertex; + + std::uint32_t vertexSize = 0; + + if (flags & Vertex::VF_VERTEX) { + vertexSize += sizeof(float) * 4; + } + if (flags & Vertex::VF_UV) { + vertexSize += sizeof(std::uint16_t) * 2; + } + if (flags & Vertex::VF_UV_2) { + vertexSize += sizeof(std::uint16_t) * 2; + } + if (flags & Vertex::VF_NORMAL) { + vertexSize += sizeof(std::uint16_t) * 2; + if (flags & Vertex::VF_TANGENT) { + vertexSize += sizeof(std::uint16_t) * 2; + } + } + if (flags & Vertex::VF_COLORS) { + vertexSize += sizeof(std::uint8_t) * 4; + } + if (flags & Vertex::VF_SKINNED) { + vertexSize += sizeof(std::uint16_t) * 4 + sizeof(std::uint8_t) * 4; + } + if (flags & Vertex::VF_EYEDATA) { + vertexSize += sizeof(float); + } + if (flags & Vertex::VF_LANDDATA) { + vertexSize += sizeof(uint32_t) * 2; + } + + return vertexSize; +} + +static std::string PrintVertexFlags(uint16_t value) +{ + using RE::BSGraphics::Vertex; + + std::string result; + if (value & Vertex::Flags::VF_VERTEX) + result += "VF_VERTEX "; + + if (value & Vertex::Flags::VF_UV) + result += "VF_UV "; + + if (value & Vertex::Flags::VF_UV_2) + result += "VF_UV_2 "; + + if (value & Vertex::Flags::VF_NORMAL) + result += "VF_NORMAL "; + + if (value & Vertex::Flags::VF_TANGENT) + result += "VF_TANGENT "; + + if (value & Vertex::Flags::VF_COLORS) + result += "VF_COLORS "; + + if (value & Vertex::Flags::VF_SKINNED) + result += "VF_SKINNED "; + + if (value & Vertex::Flags::VF_LANDDATA) + result += "VF_LANDDATA "; + + if (value & Vertex::Flags::VF_EYEDATA) + result += "VF_EYEDATA "; + + if (value & Vertex::Flags::VF_INSTANCEDATA) + result += "VF_INSTANCEDATA "; + + if (value & Vertex::Flags::VF_FULLPREC) + result += "VF_FULLPREC "; + + return result; +} + +static uint16_t GetVertexSize2(uint64_t desc) +{ + return (desc & 0xF) * 4; +} + +void Shape::BuildMesh(RE::BSGraphics::TriShape* rendererData, const uint32_t& vertexCountIn, const uint32_t& triangleCountIn, const std::uint16_t& bonesPerVertex) +{ + auto vertexDesc = rendererData->vertexDesc; + + vertexFlags = vertexDesc.GetFlags(); + + bool hasNormal = vertexFlags & RE::BSGraphics::Vertex::VF_NORMAL; + bool hasBitangent = vertexFlags & RE::BSGraphics::Vertex::VF_TANGENT; + + // Vertices + { + bool dynamic = false; + bool skinned = flags.any(Flags::Skinned); + + if (flags.any(Flags::Dynamic)) { + dynamicPosition.resize(vertexCountIn); + + static REL::Relocation dynamicTriShapeRTTI{ RE::BSDynamicTriShape::Ni_RTTI }; + + if (geometry->GetRTTI() == dynamicTriShapeRTTI.get()) { + auto* pDynamicTriShape = reinterpret_cast(geometry); + + if (pDynamicTriShape) { + auto& dynTriShapeRuntime = pDynamicTriShape->GetDynamicTrishapeRuntimeData(); + + dynTriShapeRuntime.lock.Lock(); + std::memcpy(dynamicPosition.data(), dynTriShapeRuntime.dynamicData, dynTriShapeRuntime.dataSize); + dynTriShapeRuntime.lock.Unlock(); + + dynamic = true; + } + } + + // Clear Dynamic flag if geometry is not a valid BSDynamicTriShape. + // Enforces the invariant that when Flags::Dynamic is set, geometry is always a BSDynamicTriShape. + if (!dynamic) + flags.reset(Flags::Dynamic); + } + + vertices.resize(vertexCountIn); + + if (skinned) + skinning.resize(vertexCountIn); + + auto vertexSize = GetVertexSize(vertexFlags); + auto vertexSize2 = GetVertexSize2(*reinterpret_cast(&vertexDesc)); + + if (vertexSize != vertexSize2) + logger::warn("[RT] Shape::BuildMesh - Vertex size mismatch: {} != {}", vertexSize, vertexSize2); + + bool hasPosition = vertexFlags & RE::BSGraphics::Vertex::VF_VERTEX; + + uint32_t posOffset = vertexDesc.GetAttributeOffset(RE::BSGraphics::Vertex::VA_POSITION); + uint32_t uvOffset = vertexDesc.GetAttributeOffset(RE::BSGraphics::Vertex::VA_TEXCOORD0); + uint32_t normOffset = vertexDesc.GetAttributeOffset(RE::BSGraphics::Vertex::VA_NORMAL); + uint32_t tangOffset = vertexDesc.GetAttributeOffset(RE::BSGraphics::Vertex::VA_BINORMAL); + uint32_t colorOffset = vertexDesc.GetAttributeOffset(RE::BSGraphics::Vertex::VA_COLOR); + uint32_t skinOffset = vertexDesc.GetAttributeOffset(RE::BSGraphics::Vertex::VA_SKINNING); + uint32_t landOffset = vertexDesc.GetAttributeOffset(RE::BSGraphics::Vertex::VA_LANDDATA); + + uint32_t boneIDOffset = sizeof(uint16_t) * bonesPerVertex; + + eastl::vector weights; + eastl::vector boneIds; + + if (skinned) { + weights.resize(bonesPerVertex); + boneIds.resize(bonesPerVertex); + } + + float3 min(FLT_MAX), max(-FLT_MAX); + + for (uint16_t i = 0; i < vertexCountIn; i++) { + uint8_t* vtx = rendererData->rawVertexData + i * vertexSize; + + Vertex vertexData{}; + + float4 pos; + + if (hasPosition) { + std::memcpy(&pos, vtx + posOffset, sizeof(float4)); + } else if (dynamic) { + pos = dynamicPosition[i]; + } + + min = float3::Min(min, float3(pos)); + max = float3::Max(max, float3(pos)); + + if (hasPosition || dynamic) { + vertexData.Position = { pos.x, pos.y, pos.z }; + } + + if (vertexFlags & RE::BSGraphics::Vertex::VF_UV) { + std::memcpy(&vertexData.Texcoord0, vtx + uvOffset, sizeof(half2)); + } + + if (hasNormal) { + uint32_t normalData; + std::memcpy(&normalData, vtx + normOffset, sizeof(uint32_t)); + auto normalUnpacked = UnpackByte4(normalData); + + vertexData.Normal = Normalize({ normalUnpacked.x, normalUnpacked.y, normalUnpacked.z }); + + if (hasBitangent) { + uint32_t bitangentData; + std::memcpy(&bitangentData, vtx + tangOffset, sizeof(uint32_t)); + auto bitangentUnpacked = UnpackByte4(bitangentData); + + vertexData.Bitangent = Normalize({ bitangentUnpacked.x, bitangentUnpacked.y, bitangentUnpacked.z }); + + float3 tangent = { pos.w, normalUnpacked.w, bitangentUnpacked.w }; + + if (!hasPosition) { + tangent.x = std::sqrt(std::max(0.0f, 1.0f - tangent.y * tangent.y - tangent.z * tangent.z)); + + float handedness = (tangent.x * (vertexData.Bitangent.y * vertexData.Normal.z - vertexData.Bitangent.z * vertexData.Normal.y) + + tangent.y * (vertexData.Bitangent.z * vertexData.Normal.x - vertexData.Bitangent.x * vertexData.Normal.z) + + tangent.z * (vertexData.Bitangent.x * vertexData.Normal.y - vertexData.Bitangent.y * vertexData.Normal.x)) < 0 ? + -1.0f : + 1.0f; + + tangent.x *= handedness; + } + + vertexData.Tangent = Normalize(tangent); + } + } + + if (skinned) { + if (vertexFlags & RE::BSGraphics::Vertex::VF_SKINNED) { + std::memcpy(weights.data(), vtx + skinOffset, sizeof(half) * bonesPerVertex); + std::memcpy(boneIds.data(), vtx + skinOffset + boneIDOffset, sizeof(uint8_t) * bonesPerVertex); + + float sum = 0.0f; + for (float w : weights) { + sum += w; + } + + if (sum < 1.0f) { + weights[0] += 1.0f - sum; + } else if (sum > eastl::numeric_limits::epsilon()) { + float sumRcp = 1.0f / sum; + + for (half& w : weights) { + w *= sumRcp; + } + } else { + weights = { 1.0f }; + } + } else { + weights = { 1.0f }; + boneIds = { 0 }; + } + + auto fillSkinningData = [](eastl::vector& vector) { + auto currSize = vector.size(); + + if (currSize < 4) { + vector.insert(vector.end(), 4 - currSize, 0); + } + }; + + fillSkinningData(weights); + fillSkinningData(boneIds); + + skinning[i] = Skinning(weights, boneIds); + } + + if (vertexFlags & RE::BSGraphics::Vertex::VF_LANDDATA) { + std::memcpy(&vertexData.LandBlend0, vtx + landOffset, sizeof(uint32_t)); + std::memcpy(&vertexData.LandBlend1, vtx + landOffset + sizeof(uint32_t), sizeof(uint32_t)); + } + + if (vertexFlags & RE::BSGraphics::Vertex::VF_COLORS) { + std::memcpy(&vertexData.Color, vtx + colorOffset, sizeof(uint32_t)); + } else { + vertexData.Color.packed = PackUByte4({ 1.0f, 1.0f, 1.0f, 1.0f }); + } + + vertices[i] = vertexData; + } + + aabb = AABB::FromMinMax(min, max); + + vertexCount = vertexCountIn; + } + + // Triangles + { + // Landscape contains no triangles, so we build them ourselves + if (flags & Flags::Landscape) { + triangles.reserve(triangleCountIn); + + constexpr uint16_t GRID_SIZE = 16; + constexpr uint16_t VERTICES = GRID_SIZE + 1; + + for (uint16_t y = 0; y < GRID_SIZE; y++) { + for (uint16_t x = 0; x < GRID_SIZE; x++) { + uint16_t v0 = y * VERTICES + x; + uint16_t v1 = v0 + 1; + uint16_t v2 = v0 + VERTICES; + uint16_t v3 = v2 + 1; + + if (v0 >= vertexCount || v1 >= vertexCount || v2 >= vertexCount) + logger::critical("[RT] Quad {} {} vertex overflow: [{}, {}, {}]", x, y, v0, v1, v2); + + // First triangle + triangles.emplace_back(v0, v1, v2); + + // Second triangle + triangles.emplace_back(v1, v3, v2); + } + } + } else { + triangles.resize(triangleCountIn); + std::memcpy(triangles.data(), rendererData->rawIndexData, sizeof(Triangle) * triangleCountIn); + } + + triangleCount = triangleCountIn; + } + + if (!hasNormal || !hasBitangent) { + CalculateVectors(!hasNormal); + } +} + +eastl::shared_ptr Shape::TextureRegister(const RE::NiPointer niPointer, eastl::shared_ptr defaultTexture, bool modelSpaceNormalMap = false) +{ + if (!niPointer || !niPointer->rendererTexture) + return defaultTexture; + + auto& rt = globals::features::raytracing; + + if (modelSpaceNormalMap) + return rt.GetMSNormalMapRegister(this, niPointer->rendererTexture, defaultTexture); + else { + return rt.GetTextureRegister(reinterpret_cast(niPointer->rendererTexture->texture), defaultTexture); + } +} + +void Shape::BuildMaterial(const RE::BSGeometry::GEOMETRY_RUNTIME_DATA& geometryRuntimeData, [[maybe_unused]] const char* name, RE::FormID formID) +{ + auto& rt = globals::features::raytracing; + + //auto& whiteTexture = rt.defaultWhiteTexture->allocation; + auto& grayTexture = rt.defaultGrayTexture->allocation; + auto& normalTexture = rt.defaultNormalTexture->allocation; + auto& blackTexture = rt.defaultBlackTexture->allocation; + auto& rmaosTexture = rt.defaultRMAOSTexture->allocation; + auto& detailTexture = rt.defaultDetailTexture->allocation; + + using State = RE::BSGeometry::States; + using Feature = RE::BSShaderMaterial::Feature; + using EShaderPropertyFlag = RE::BSShaderProperty::EShaderPropertyFlag; + + eastl::array colors = { + float4(1.0f, 1.0f, 1.0f, 1.0f), + float4(0.0f, 0.0f, 0.0f, 0.0f), + float4(1.0f, 1.0f, 1.0f, 1.0f) + }; + + eastl::array scalars; + scalars.fill(0.0f); + scalars[3] = 0.5f; + + eastl::array texCoordOffsetScales = { + float4(0.0f, 0.0f, 1.0f, 1.0f), + float4(0.0f, 0.0f, 1.0f, 1.0f) + }; + + uint16_t alphaFlags = 0u; + + eastl::array, 20> textures; + textures.fill(blackTexture); + + RE::BSShader::Type shaderType = RE::BSShader::Type::None; + REX::EnumSet shaderFlags; + RE::BSShaderMaterial::Feature feature = RE::BSShaderMaterial::Feature::kNone; + stl::enumeration pbrFlags; + + { + auto* property = geometryRuntimeData.properties[State::kProperty].get(); + + auto* effect = geometryRuntimeData.properties[State::kEffect].get(); + + if (effect) { + if (RE::BSShaderProperty* shaderProp = netimmerse_cast(effect)) { + shaderFlags = shaderProp->flags.get(); + colors[0].w *= shaderProp->alpha; + } + + if (RE::BSLightingShaderProperty* lightingShaderProp = skyrim_cast(effect)) { + shaderType = RE::BSShader::Type::Lighting; + + logger::debug("[RT] BuildMaterial - BSLightingShaderProperty [0x{:08X}] Flags: {}", reinterpret_cast(lightingShaderProp), GetFlagsString(lightingShaderProp->flags.underlying())); + + // Set alpha flags + if (property && property->GetType() == RE::NiProperty::Type::kAlpha) { + auto alphaProperty = property->GetRTTI() == globals::rtti::NiAlphaPropertyRTTI.get() ? reinterpret_cast(property) : nullptr; + + if (alphaProperty && alphaProperty->GetAlphaBlending()) { + flags |= Flags::AlphaBlending; + alphaFlags |= Material::AlphaFlags::kAlphaBlend; + } + if (alphaProperty && alphaProperty->GetAlphaTesting()) { + flags |= Flags::AlphaTesting; + alphaFlags |= Material::AlphaFlags::kAlphaTest; + scalars[3] = alphaProperty->alphaThreshold / 255.0f; + } + } + + colors[1] = { + lightingShaderProp->emissiveColor->red, + lightingShaderProp->emissiveColor->green, + lightingShaderProp->emissiveColor->blue, + lightingShaderProp->emissiveMult + }; + + if (auto shaderMaterial = lightingShaderProp->material) { + feature = shaderMaterial->GetFeature(); + + for (size_t i = 0; i < 2; i++) { + texCoordOffsetScales[i] = { + shaderMaterial->texCoordOffset[i].x, shaderMaterial->texCoordOffset[i].y, + shaderMaterial->texCoordScale[i].x, shaderMaterial->texCoordScale[i].y + }; + } + + // Landscape + if (const auto* lightingBaseMaterialLand = skyrim_cast(shaderMaterial)) { + textures[0] = TextureRegister(lightingBaseMaterialLand->diffuseTexture, grayTexture); + textures[Material::MAX_PBRLAND_TEXTURES] = TextureRegister(lightingBaseMaterialLand->normalTexture, normalTexture); + + for (uint i = 0; i < std::min(lightingBaseMaterialLand->numLandscapeTextures, Material::MAX_LAND_TEXTURES); i++) { + textures[i + 1] = TextureRegister(lightingBaseMaterialLand->landscapeDiffuseTexture[i], grayTexture); + textures[Material::MAX_PBRLAND_TEXTURES + i + 1] = TextureRegister(lightingBaseMaterialLand->landscapeNormalTexture[i], normalTexture); + } + + textures[Material::MAX_PBRLAND_TEXTURES * 3] = TextureRegister(lightingBaseMaterialLand->terrainOverlayTexture, blackTexture); + textures[Material::MAX_PBRLAND_TEXTURES * 3 + 1] = TextureRegister(lightingBaseMaterialLand->terrainNoiseTexture, blackTexture); + } else if (typeid(*shaderMaterial) == typeid(BSLightingShaderMaterialPBRLandscape)) { + const auto* lightingPBRMaterialLand = static_cast(shaderMaterial); + + for (uint i = 0; i < std::min(lightingPBRMaterialLand->numLandscapeTextures, Material::MAX_PBRLAND_TEXTURES); i++) { + textures[i] = TextureRegister(lightingPBRMaterialLand->landscapeBaseColorTextures[i], grayTexture); + textures[Material::MAX_PBRLAND_TEXTURES + i] = TextureRegister(lightingPBRMaterialLand->landscapeNormalTextures[i], normalTexture); + textures[Material::MAX_PBRLAND_TEXTURES * 2 + i] = TextureRegister(lightingPBRMaterialLand->landscapeRMAOSTextures[i], rmaosTexture); + } + + textures[Material::MAX_PBRLAND_TEXTURES * 3] = TextureRegister(lightingPBRMaterialLand->terrainOverlayTexture, blackTexture); + textures[Material::MAX_PBRLAND_TEXTURES * 3 + 1] = TextureRegister(lightingPBRMaterialLand->terrainNoiseTexture, blackTexture); + } else if (typeid(*shaderMaterial) == typeid(BSLightingShaderMaterialPBR)) { + // TrueBR - Tried to check for 'lightingShaderProp->flags.any(EShaderPropertyFlag::kMenuScreen)' + // but it did not work at all, skyrim_cast is not safe and will cast even if not PBR material (no RTTI?) + + const auto* lightingPBRMaterial = static_cast(shaderMaterial); + + textures[0] = TextureRegister(lightingPBRMaterial->diffuseTexture, grayTexture); + textures[RTConstants::MATERIAL_NORMALMAP_ID] = TextureRegister(lightingPBRMaterial->normalTexture, normalTexture); + textures[2] = TextureRegister(lightingPBRMaterial->emissiveTexture, blackTexture); + textures[3] = TextureRegister(lightingPBRMaterial->rmaosTexture, rmaosTexture); + + scalars[0] = lightingPBRMaterial->GetRoughnessScale(); + scalars[1] = lightingPBRMaterial->GetSpecularLevel(); + + pbrFlags = GetPBRShaderFlags(lightingPBRMaterial); + + if (pbrFlags & PBRShaderFlags::Subsurface) { + textures[6] = TextureRegister(lightingPBRMaterial->featuresTexture0, blackTexture); + + auto sssColor = lightingPBRMaterial->GetSubsurfaceColor(); + colors[2] = { sssColor.red, sssColor.green, sssColor.blue, 1.0f }; + scalars[2] = lightingPBRMaterial->GetSubsurfaceOpacity(); + } + + // Enforce TruePBR flag + shaderFlags.set(EShaderPropertyFlag::kMenuScreen); + } else { + // Roughness Scale + scalars[0] = 1.0f; + + // Specular Level + scalars[1] = 0.04f; + + // Vanilla Materials + if (const RE::BSLightingShaderMaterialBase* lightingBaseMaterial = skyrim_cast(shaderMaterial)) { + textures[0] = TextureRegister(lightingBaseMaterial->diffuseTexture, grayTexture); + + bool isModelSpaceNormalMap = shaderFlags.any(EShaderPropertyFlag::kModelSpaceNormals); + textures[RTConstants::MATERIAL_NORMALMAP_ID] = TextureRegister(lightingBaseMaterial->normalTexture, normalTexture, isModelSpaceNormalMap); + + if (shaderFlags.any(EShaderPropertyFlag::kSpecular)) { + if (shaderFlags.any(EShaderPropertyFlag::kModelSpaceNormals)) { + textures[3] = TextureRegister(lightingBaseMaterial->specularBackLightingTexture, blackTexture); + } + + colors[2] = { + lightingBaseMaterial->specularColor.red, + lightingBaseMaterial->specularColor.green, + lightingBaseMaterial->specularColor.blue, + lightingBaseMaterial->specularColorScale + }; + + scalars[0] = ShininessToRoughness(lightingBaseMaterial->specularPower); + } + + // Envmap + if (feature == Feature::kEnvironmentMap || feature == Feature::kEye) { + if (const auto* lightingEnvmapMaterial = skyrim_cast(shaderMaterial)) { + textures[4] = TextureRegister(lightingEnvmapMaterial->envTexture, blackTexture); + textures[5] = TextureRegister(lightingEnvmapMaterial->envMaskTexture, blackTexture); + } + } + + // Glow + if (feature == Feature::kGlowMap) { + if (const auto* lightingGlowMaterial = skyrim_cast(shaderMaterial)) { + if (lightingShaderProp->flags.none(EShaderPropertyFlag::kOwnEmit)) { + colors[1].x = 1.0f; + colors[1].y = 1.0f; + colors[1].z = 1.0f; + } + + textures[2] = TextureRegister(lightingGlowMaterial->glowTexture, blackTexture); + } + } else if (lightingShaderProp->flags.none(EShaderPropertyFlag::kOwnEmit)) { + colors[1].x = 0.0f; + colors[1].y = 0.0f; + colors[1].z = 0.0f; + } + + // Hair + if (feature == Feature::kHairTint) { + if (const auto* lightingHairTintMaterial = skyrim_cast(shaderMaterial)) { + colors[0].x = lightingHairTintMaterial->tintColor.red; + colors[0].y = lightingHairTintMaterial->tintColor.green; + colors[0].z = lightingHairTintMaterial->tintColor.blue; + + // Load flowmap texture for hair (stored in specularBackLightingTexture slot) + textures[3] = TextureRegister(lightingBaseMaterial->specularBackLightingTexture, blackTexture); + } + } + + // FaceGen + if (feature == Feature::kFaceGen) { + if (const auto* lightingFacegenMaterial = skyrim_cast(shaderMaterial)) { + if (IsPlayer(formID)) + textures[4] = rt.GetTextureRegister(globals::game::renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kPLAYER_FACEGEN_TINT].texture, grayTexture); + else + textures[4] = TextureRegister(lightingFacegenMaterial->tintTexture, grayTexture); + + textures[5] = TextureRegister(lightingFacegenMaterial->detailTexture, detailTexture); + } + } + + // FaceGen RGB Tint + if (feature == Feature::kFaceGenRGBTint) { + if (const auto* lightingFacegenTintMaterial = skyrim_cast(shaderMaterial)) { + colors[0].x = lightingFacegenTintMaterial->tintColor.red; + colors[0].y = lightingFacegenTintMaterial->tintColor.green; + colors[0].z = lightingFacegenTintMaterial->tintColor.blue; + } + } + } + } + } else { + logger::warn("[RT] BuildMaterial - BSShaderMaterial is nullptr"); + } + } + + if (auto effectShaderProp = netimmerse_cast(effect)) { + shaderType = RE::BSShader::Type::Effect; + + logger::debug("[RT] BuildMaterial - BSEffectShaderProperty: {}", name); + logger::debug("[RT] BuildMaterial - Flags: {}", GetFlagsString(effectShaderProp->flags.underlying())); + + if (auto effectMaterial = skyrim_cast(effectShaderProp->material)) { + colors[1] = { + effectMaterial->baseColor.red, + effectMaterial->baseColor.green, + effectMaterial->baseColor.blue, + effectMaterial->baseColorScale + }; + + textures[0] = TextureRegister(effectMaterial->sourceTexture, blackTexture); + textures[2] = TextureRegister(effectMaterial->greyscaleTexture, blackTexture); + } + } + } + } + + material = Material( + shaderFlags, + shaderType, + feature, + pbrFlags, + alphaFlags, + colors, + scalars, + texCoordOffsetScales, + textures); +} + +stl::enumeration Shape::GetPBRShaderFlags(const BSLightingShaderMaterialPBR* pbrMaterial) +{ + auto graphicsState = globals::game::graphicsState; + + stl::enumeration pbrFlags; + + if (pbrMaterial->pbrFlags.any(PBRFlags::TwoLayer)) { + pbrFlags.set(PBRShaderFlags::TwoLayer); + if (pbrMaterial->pbrFlags.any(PBRFlags::InterlayerParallax)) { + pbrFlags.set(PBRShaderFlags::InterlayerParallax); + } + if (pbrMaterial->pbrFlags.any(PBRFlags::CoatNormal)) { + pbrFlags.set(PBRShaderFlags::CoatNormal); + } + if (pbrMaterial->pbrFlags.any(PBRFlags::ColoredCoat)) { + pbrFlags.set(PBRShaderFlags::ColoredCoat); + } + } else if (pbrMaterial->pbrFlags.any(PBRFlags::HairMarschner)) { + pbrFlags.set(PBRShaderFlags::HairMarschner); + } else { + if (pbrMaterial->pbrFlags.any(PBRFlags::Subsurface)) { + pbrFlags.set(PBRShaderFlags::Subsurface); + } + if (pbrMaterial->pbrFlags.any(PBRFlags::Fuzz)) { + pbrFlags.set(PBRShaderFlags::Fuzz); + } else { + if (pbrMaterial->GetGlintParameters().enabled) { + pbrFlags.set(PBRShaderFlags::Glint); + } + + // This is slimmed down because we don't have access to lightingFlags + if (pbrMaterial->GetProjectedMaterialGlintParameters().enabled) { + pbrFlags.set(PBRShaderFlags::ProjectedGlint); + } + } + } + + const bool hasEmissive = pbrMaterial->emissiveTexture != nullptr && pbrMaterial->emissiveTexture != graphicsState->GetRuntimeData().defaultTextureBlack; + if (hasEmissive) { + pbrFlags.set(PBRShaderFlags::HasEmissive); + } + + const bool hasDisplacement = pbrMaterial->displacementTexture != nullptr && pbrMaterial->displacementTexture != graphicsState->GetRuntimeData().defaultTextureBlack; + if (hasDisplacement) { + pbrFlags.set(PBRShaderFlags::HasDisplacement); + } + + const bool hasFeaturesTexture0 = pbrMaterial->featuresTexture0 != nullptr && pbrMaterial->featuresTexture0 != graphicsState->GetRuntimeData().defaultTextureWhite; + if (hasFeaturesTexture0) { + pbrFlags.set(PBRShaderFlags::HasFeaturesTexture0); + } + + const bool hasFeaturesTexture1 = pbrMaterial->featuresTexture1 != nullptr && pbrMaterial->featuresTexture1 != graphicsState->GetRuntimeData().defaultTextureWhite; + if (hasFeaturesTexture1) { + pbrFlags.set(PBRShaderFlags::HasFeaturesTexture1); + } + + return pbrFlags; +} + +void Shape::CreateBuffers(const std::wstring& name) +{ + auto& rt = globals::features::raytracing; + auto* device = rt.d3d12Device.get(); + auto* commandList = rt.commandList.get(); + + auto* skinningHeap = rt.skinningPipeline->heap.get(); + auto* giHeap = rt.giHeap.get(); + + D3D12MA::ALLOCATION_DESC allocDesc = { .HeapType = D3D12_HEAP_TYPE_DEFAULT }; + + D3D12MA::ALLOCATION_DESC uploadAllocDesc = { .HeapType = D3D12_HEAP_TYPE_UPLOAD }; + uploadAllocDesc.CustomPool = rt.uploadPool.get(); + + auto allocator = rt.allocator.get(); + + const auto allocationIndex = allocation->GetIndex(); + + std::lock_guard lock{ rt.renderMutex }; + + // Dynamic + if (flags & Flags::Dynamic) { + allocDesc.CustomPool = rt.dynamicVertexPool.get(); + dynamicPositionBuffer = eastl::make_unique>(device, allocator, allocDesc, uploadAllocDesc, vertexCount, false); + + dynamicPositionBuffer->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + dynamicPositionBuffer->CreateSRV(skinningHeap->CPUHandle(SkinningHeap::Slot::DynamicVertices, allocationIndex)); + + UpdateUploadDynamicBuffers(commandList); + } + + bool updatable = (flags & Flags::Dynamic) || (flags & Flags::Skinned); + + // Vertices + { + allocDesc.CustomPool = rt.vertexPool.get(); + vertexBuffer = eastl::make_unique>(device, allocator, allocDesc, uploadAllocDesc, vertexCount, updatable); + + vertexBuffer->UpdateList(vertices.data(), vertexCount); + vertexBuffer->SetName(std::format(L"Vertex Buffer [{}] - {}", allocation->GetIndex(), name).c_str()); + + if (vertexCount != vertices.size()) + logger::error("[RT] Shape::CreateBuffers - VertexCount: {}, Vertices Size: {}", vertexCount, vertices.size()); + + vertexBuffer->Upload(commandList, 0, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + // UAV + if (updatable) { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.Buffer.FirstElement = 0; + uavDesc.Buffer.NumElements = vertexCount; + uavDesc.Buffer.StructureByteStride = sizeof(Vertex); + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + device->CreateUnorderedAccessView(vertexBuffer->resource.get(), nullptr, &uavDesc, skinningHeap->CPUHandle(SkinningHeap::Slot::Output, allocationIndex)); + } + + // SRV + { + D3D12_SHADER_RESOURCE_VIEW_DESC vbDesc = {}; + vbDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + vbDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + vbDesc.Format = DXGI_FORMAT_UNKNOWN; + vbDesc.Buffer.FirstElement = 0; + vbDesc.Buffer.NumElements = vertexCount; + vbDesc.Buffer.StructureByteStride = sizeof(Vertex); + vbDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + device->CreateShaderResourceView(vertexBuffer->resource.get(), &vbDesc, giHeap->CPUHandle(GIHeap::Slot::Vertices, allocationIndex)); + } + } + + // Vertices Copy + if (updatable) + { + allocDesc.CustomPool = rt.vertexCopyPool.get(); + vertexCopyBuffer = eastl::make_unique>(device, allocator, allocDesc, uploadAllocDesc, vertexCount); + + vertexCopyBuffer->UpdateList(vertices.data(), vertexCount); + vertexCopyBuffer->SetName(std::format(L"Vertex Copy Buffer [{}] - {}", allocationIndex, name).c_str()); + + vertexCopyBuffer->Upload(commandList, 0, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + // SRV + { + D3D12_SHADER_RESOURCE_VIEW_DESC vbDesc = {}; + vbDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + vbDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + vbDesc.Format = DXGI_FORMAT_UNKNOWN; + vbDesc.Buffer.FirstElement = 0; + vbDesc.Buffer.NumElements = vertexCount; + vbDesc.Buffer.StructureByteStride = sizeof(Vertex); + vbDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + device->CreateShaderResourceView(vertexCopyBuffer->resource.get(), &vbDesc, skinningHeap->CPUHandle(SkinningHeap::Slot::Vertices, allocationIndex)); + } + } + + // Skinning + if (flags & Flags::Skinned) { + allocDesc.CustomPool = rt.skinningPool.get(); + skinningBuffer = eastl::make_unique>(device, allocator, allocDesc, uploadAllocDesc, vertexCount, false); + + skinningBuffer->UpdateList(skinning.data(), vertexCount); + skinningBuffer->SetName(std::format(L"Skinning Buffer [{}] - {}", allocationIndex, name).c_str()); + + skinningBuffer->Upload(commandList, 0, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + // SRV + { + D3D12_SHADER_RESOURCE_VIEW_DESC vbDesc = {}; + vbDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + vbDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + vbDesc.Format = DXGI_FORMAT_UNKNOWN; + vbDesc.Buffer.FirstElement = 0; + vbDesc.Buffer.NumElements = vertexCount; + vbDesc.Buffer.StructureByteStride = sizeof(Skinning); + vbDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + device->CreateShaderResourceView(skinningBuffer->resource.get(), &vbDesc, skinningHeap->CPUHandle(SkinningHeap::Slot::SkinningData, allocationIndex)); + } + } + + // Triangles + { + allocDesc.CustomPool = rt.trianglePool.get(); + triangleBuffer = eastl::make_unique>(device, allocator, allocDesc, uploadAllocDesc, triangleCount, false); + + triangleBuffer->UpdateList(triangles.data(), triangles.size()); + triangleBuffer->SetName(std::format(L"Triangle Buffer [{}] - {}", allocationIndex, name).c_str()); + + triangleBuffer->Upload(commandList, 0, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + // SRV + { + D3D12_SHADER_RESOURCE_VIEW_DESC ibDesc = {}; + ibDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + ibDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + ibDesc.Format = DXGI_FORMAT_UNKNOWN; + ibDesc.Buffer.FirstElement = 0; + ibDesc.Buffer.NumElements = triangleCount; + ibDesc.Buffer.StructureByteStride = sizeof(Triangle); + ibDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + device->CreateShaderResourceView(triangleBuffer->resource.get(), &ibDesc, giHeap->CPUHandle(GIHeap::Slot::Triangles, allocationIndex)); + } + } + + // Updatable geometry is already in root space + if (updatable) + localToRoot = float3x4( + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f); + + // This buffer is used for BLAS build/rebuild + rt.transformBuffer->UpdateAt(&localToRoot, allocationIndex); + rt.transformBuffer->UploadRegion(commandList, sizeof(float3x4), sizeof(float3x4) * allocationIndex); +} + +void Shape::CalculateVectors(bool calculateNormal) +{ + eastl::vector normals; + + if (calculateNormal) + normals.resize(vertexCount, float3(0, 0, 0)); + + eastl::vector tangents; + tangents.resize(vertexCount, float3(0, 0, 0)); + + // Loop over triangles + for (auto& t : triangles) { + Vertex& v0 = vertices[t.x]; + Vertex& v1 = vertices[t.y]; + Vertex& v2 = vertices[t.z]; + + float3 pos0 = v0.Position; + float3 pos1 = v1.Position; + float3 pos2 = v2.Position; + + half2 uv0 = v0.Texcoord0; + half2 uv1 = v1.Texcoord0; + half2 uv2 = v2.Texcoord0; + + float3 deltaPos1 = pos1 - pos0; + float3 deltaPos2 = pos2 - pos0; + + // Optionaly compute normals + if (calculateNormal) { + float3 faceNormal = deltaPos1.Cross(deltaPos2); + + normals[t.x] += faceNormal; + normals[t.y] += faceNormal; + normals[t.z] += faceNormal; + } + + // Compute UV deltas + float2 deltaUV1 = uv1 - uv0; + float2 deltaUV2 = uv2 - uv0; + + float det = deltaUV1.x * deltaUV2.y - deltaUV1.y * deltaUV2.x; + + if (fabs(det) < 1e-8f) + continue; + + float r = 1.0f / det; + + float3 tangent = r * (deltaUV2.y * deltaPos1 - deltaUV1.y * deltaPos2); + + + // Accumulate per-vertex + tangents[t.x] += tangent; + tangents[t.y] += tangent; + tangents[t.z] += tangent; + } + + // Normalize and orthogonalize + for (size_t i = 0; i < vertexCount; i++) { + auto& v = vertices[i]; + + float3 n = Normalize(calculateNormal ? normals[i] : float3(v.Normal)); + + float3 t = Normalize(tangents[i] - n * n.Dot(tangents[i])); + + float3 b = n.Cross(t); + float sign = (b.Dot(t.Cross(n)) < 0.0f) ? -1.0f : 1.0f; + b *= sign; + + if (calculateNormal) + v.Normal = n; + + v.Tangent = t; + v.Bitangent = b; + } +} + +D3D12_RAYTRACING_GEOMETRY_DESC Shape::GeometryDesc() const +{ + bool isAlpha = flags.any(Shape::Flags::AlphaTesting, Shape::Flags::AlphaBlending); + bool isWindows = material.shaderFlags.any(RE::BSShaderProperty::EShaderPropertyFlag::kAssumeShadowmask) && (material.Feature == RE::BSShaderMaterial::Feature::kGlowMap || material.PBRFlags.any(PBRShaderFlags::HasEmissive)); + + bool isOpaque = !isAlpha && !isWindows; + + return { + .Type = D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES, + .Flags = isOpaque ? D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE : D3D12_RAYTRACING_GEOMETRY_FLAG_NONE, + .Triangles = { + .Transform3x4 = TransformBuffer(), + .IndexFormat = DXGI_FORMAT_R16_UINT, + .VertexFormat = DXGI_FORMAT_R32G32B32_FLOAT, + .IndexCount = triangleCount * 3, + .VertexCount = vertexCount, + .IndexBuffer = triangleBuffer->resource->GetGPUVirtualAddress(), + .VertexBuffer = { + .StartAddress = vertexBuffer->resource->GetGPUVirtualAddress(), + .StrideInBytes = sizeof(Vertex) } } + }; +} + +D3D12_GPU_VIRTUAL_ADDRESS Shape::TransformBuffer() const +{ + auto offset = static_cast(allocation->GetIndex()) * sizeof(float3x4); + return globals::features::raytracing.transformBuffer->resource->GetGPUVirtualAddress() + offset; +} + +Shape::Flags Shape::Update() +{ + const auto dynamic = flags.any(Shape::Flags::Dynamic); + const auto skinned = flags.any(Shape::Flags::Skinned); + + // I don't know if kHidden is set on inner nodes for culling, so to be safe we check + if (dynamic || skinned) { + SetPendingState(State::Hidden, geometry->GetFlags().any(RE::NiAVObject::Flag::kHidden)); + } + + if (IsPendingHidden()) { + return Shape::Flags::None; + } + + Shape::Flags updateFlags = Shape::Flags::None; + + if (dynamic && UpdateDynamicPosition()) { + updateFlags |= Shape::Flags::Dynamic; + } + + if (skinned && UpdateSkinning()) { + updateFlags |= Shape::Flags::Skinned; + } + + return updateFlags; +} + +// Updates Dynamic Vertex position (and Bitangent.x) buffer +// TODO: Test performance and stability of using a upload heap buffer and keeping it mapped to dynamicData +bool Shape::UpdateDynamicPosition() +{ + auto* dynamicTriShape = reinterpret_cast(geometry); + + auto& runtimeData = dynamicTriShape->GetDynamicTrishapeRuntimeData(); + + if (!runtimeData.dynamicData) + return false; + + auto& dataSize = runtimeData.dataSize; + + // Is this even a possibility? + if (dataSize == 0) + return false; + + runtimeData.lock.Lock(); + + // Has dynamic position changed? + if (std::memcmp(dynamicPosition.data(), runtimeData.dynamicData, dataSize) == 0) { + runtimeData.lock.Unlock(); + return false; + } + + std::memcpy(dynamicPosition.data(), runtimeData.dynamicData, dataSize); + runtimeData.lock.Unlock(); + + return true; +} + +// Updates 'dynamicPositionBuffer' with dynamicPosition.data() and uploads the buffer to the GPU using the command list +void Shape::UpdateUploadDynamicBuffers(ID3D12GraphicsCommandList4* commandList) +{ + if (flags.none(Flags::Dynamic)) + return; + + dynamicPositionBuffer->UpdateList(dynamicPosition.data(), dynamicPosition.size()); + dynamicPositionBuffer->Upload(commandList); +} + +bool Shape::IsHidden() const +{ + return ((state & State::Hidden) != State::None) || ((state & State::DismemberHidden) != State::None); +} + +bool Shape::IsPendingHidden() const +{ + return ((pendingState & State::Hidden) != State::None) || ((pendingState & State::DismemberHidden) != State::None); +} + +bool Shape::IsDirtyState() const +{ + return pendingState != state; +} + +bool Shape::UpdateSkinning() +{ + /*auto& geometryFlags = geometry->GetFlags(); + + if (geometryFlags.any(RE::NiAVObject::Flag::kNoAnimSyncS)) + return false;*/ + + // Update Bone matrices + auto& skinInstance = geometry->GetGeometryRuntimeData().skinInstance; + + // RaceMenu crash fix + if (!skinInstance || !skinInstance.get()) + return false; + + // Only update if the game has updated the matrices + if (frameID == skinInstance->frameID) + return false; + + // UBE crash fix + if (skinInstance->numMatrices == 0 || !skinInstance->boneMatrices) + return false; + + if (boneMatrices.empty()) + boneMatrices.resize(skinInstance->numMatrices); + + float3x4* boneMatricesArray = reinterpret_cast(skinInstance->boneMatrices); + + auto* rootParent = skinInstance->rootParent; + + // UBE crash fix + if (!rootParent) + return false; + + //logger::info("[RT] Shape::UpdateSkinning {} - {}, {}", geometry->name, rootParent->name, skinInstance->frameID); + + auto delta = skinInstance->frameID - frameID; + + // Protection against dangling rootParent pointer or corrupted memory causing Access Violation + __try { + auto skinRootInverse = GetXMFromNiTransform(delta > 1 ? rootParent->previousWorld.Invert() : rootParent->world.Invert()); + + frameID = skinInstance->frameID; + + for (uint i = 0; i < skinInstance->numMatrices; i++) { + XMStoreFloat3x4(&boneMatrices[i], XMMatrixMultiply(XMLoadFloat3x4(&boneMatricesArray[i]), skinRootInverse)); + } + } __except (EXCEPTION_EXECUTE_HANDLER) { + return false; + } + + return true; +} + +// State is set as pending first, final state is updated after BLAS rebuild call +void Shape::SetPendingState(State stateIn, bool activate) +{ + if (activate) + pendingState |= stateIn; + else + pendingState &= ~stateIn; +} + +void Shape::UpdateDismember(bool enable) +{ + SetPendingState(State::DismemberHidden, !enable); +} + +// Updates state from pending +void Shape::UpdateState() +{ + state = pendingState; +} + +ShapeData Shape::GetData() const +{ + return ShapeData{ + material.GetData(), + allocation->GetIndex(), + {0, 0}, + localToRoot + }; +} \ No newline at end of file diff --git a/src/Features/Raytracing/Core/Shape.h b/src/Features/Raytracing/Core/Shape.h new file mode 100644 index 0000000000..19cd2a0799 --- /dev/null +++ b/src/Features/Raytracing/Core/Shape.h @@ -0,0 +1,148 @@ +#pragma once + +#include "PCH.h" + +#include +#include + +#include "Features/Raytracing/Allocator.h" +#include "Features/Raytracing/BufferMA.h" +#include "Features/Raytracing/Types.h" +#include "Features/Raytracing/Utils.h" + +#include "Features/Raytracing/Core/Material.h" + +#include "Raytracing/Includes/Types/Shape.hlsli" +#include "Raytracing/Includes/Types/Skinning.hlsli" +#include "Raytracing/Includes/Types/Triangle.hlsli" +#include "Raytracing/Includes/Types/Vertex.hlsli" + +class Shape +{ +public: + enum Flags : uint8_t + { + None = 0, + AlphaBlending = 1 << 0, + AlphaTesting = 1 << 1, + Dynamic = 1 << 2, + Skinned = 1 << 3, + Landscape = 1 << 4, + Static = 1 << 5, + DoubleSidedGeom = 1 << 6 + }; + + enum class State : uint8_t + { + None = 0, + Hidden = 1 << 0, + DismemberHidden = 1 << 1 + }; + + // The position of this meshes SRV in the register stack + eastl::unique_ptr allocation; + + uint vertexCount = 0; + uint triangleCount = 0; + RE::BSGraphics::Vertex::Flags vertexFlags; + + // Reference to original geometry + RE::BSGeometry* geometry = nullptr; + + // We could copy straight to buffer and save some (minimal) ram, but keeping a copy allows using memcmp to detect changes + eastl::vector dynamicPosition; + eastl::vector vertices; + eastl::vector skinning; + eastl::vector triangles; + + eastl::unique_ptr> dynamicPositionBuffer = nullptr; + + eastl::unique_ptr> vertexBuffer = nullptr; + eastl::unique_ptr> vertexCopyBuffer = nullptr; + + eastl::unique_ptr> skinningBuffer = nullptr; + eastl::unique_ptr> triangleBuffer = nullptr; + + eastl::vector boneMatrices; + + Material material; + + stl::enumeration flags = Flags::None; + + AABB aabb; + + float3x4 localToRoot; + + uint16_t slot; + + uint32_t frameID; + + Shape(Flags flags, Allocation* allocation, RE::BSGeometry* geometry, float3x4 localToRoot, bool dismemberVisible = true, uint16_t slot = 0) : + flags(flags), allocation({ allocation, AllocationDeleter() }), geometry(geometry), localToRoot(localToRoot), slot(slot) + { + UpdateDismember(dismemberVisible); + } + + /*inline Shape Clone(uint16_t registerIndexIn, RE::BSGeometry* geometryIn) const + { + auto clone = Shape(registerIndexIn, geometryIn, flags); + + clone.vertexCount = vertexCount; + clone.triangleCount = triangleCount; + + clone.vertices = vertices; + clone.skinning = skinning; + clone.triangles = triangles; + + clone.material = material; + + return clone; + }*/ + + D3D12_RAYTRACING_GEOMETRY_DESC GeometryDesc() const; + + D3D12_GPU_VIRTUAL_ADDRESS TransformBuffer() const; + + void BuildMesh(RE::BSGraphics::TriShape* rendererData, const uint32_t& vertexCountIn, const uint32_t& triangleCountIn, const uint16_t& bonesPerVertex); + + void BuildMaterial(const RE::BSGeometry::GEOMETRY_RUNTIME_DATA& geometryRuntimeData, [[maybe_unused]] const char* name, RE::FormID formID); + + void CreateBuffers(const std::wstring& name); + + void CalculateVectors(bool calculateNormal); + + Flags Update(); + + bool UpdateDynamicPosition(); + + void UpdateUploadDynamicBuffers(ID3D12GraphicsCommandList4* commandList); + + bool UpdateSkinning(); + + void SetPendingState(State stateIn, bool activate); + + void UpdateDismember(bool enable); + + void UpdateState(); + + bool IsHidden() const; + + bool IsPendingHidden() const; + + bool IsDirtyState() const; + + eastl::shared_ptr TextureRegister(const RE::NiPointer niPointer, eastl::shared_ptr defaultTexture, bool modelSpaceNormalMap); + + // For PBR shader flags we need to copy exactly what TruePBR does + static stl::enumeration GetPBRShaderFlags(const BSLightingShaderMaterialPBR* pbrMaterial); + + ShapeData GetData() const; + +private: + // State is pending until BLASRebuild + State pendingState = State::None; + State state = State::None; +}; + +DEFINE_ENUM_FLAG_OPERATORS(Shape::Flags); +DEFINE_ENUM_FLAG_OPERATORS(Shape::State); \ No newline at end of file diff --git a/src/Features/Raytracing/Heap.h b/src/Features/Raytracing/Heap.h new file mode 100644 index 0000000000..54331fbd48 --- /dev/null +++ b/src/Features/Raytracing/Heap.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +template +struct Heap +{ + using Table = TableEnum; + using Slot = SlotEnum; + + static uint32_t GetTableValue(Table table) + { + return static_cast(table); + } + + static uint32_t GetSlotValue(Slot slot) + { + return static_cast(slot); + } + + static uint32_t NumDescriptors() + { + return static_cast(Slot::NumDescriptors); + } +}; + +template +concept IsHeap = requires(T t, typename T::Table table, typename T::Slot slot) { + { t.GetTableValue(table) } -> std::convertible_to; + { t.GetSlotValue(slot) } -> std::convertible_to; +}; diff --git a/src/Features/Raytracing/HeapManager.h b/src/Features/Raytracing/HeapManager.h new file mode 100644 index 0000000000..85e79c8fa8 --- /dev/null +++ b/src/Features/Raytracing/HeapManager.h @@ -0,0 +1,135 @@ +#pragma once + +#include "Features/Raytracing/Allocator.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/ShaderBindingTable.h" + +namespace DX12 +{ + template + concept EnumUInt32 = std::is_enum_v && std::is_same_v, uint32_t>; + + //template + template + struct DescriptorDesc + { + T slot; + UINT numDescriptors; + UINT registerSpace = 0; + D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; + + DescriptorDesc(T slot, UINT numDescriptors, UINT registerSpace = 0, D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE) : + slot(slot), numDescriptors(numDescriptors), registerSpace(registerSpace), flags(flags) {} + }; + + //template + template + class DescriptorTable + { + public: + DescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE type, const eastl::vector>& descriptors) : + type(type), rootParameter(new CD3DX12_ROOT_PARAMETER1()) + { + ranges.reserve(descriptors.size()); + slots.reserve(descriptors.size()); + + for (const auto& descriptor : descriptors) { + slots.push_back(descriptor.slot); + ranges.emplace_back(type, descriptor.numDescriptors, + static_cast(ranges.size()), + descriptor.registerSpace, + descriptor.flags); + } + + rootParameter->InitAsDescriptorTable(static_cast(ranges.size()), ranges.data()); + } + + T FirstSlot() const + { + return slots.front(); + } + + const CD3DX12_ROOT_PARAMETER1& GetRootParameter() const + { + return *rootParameter; + } + + private: + D3D12_DESCRIPTOR_RANGE_TYPE type; + eastl::vector slots; + eastl::vector ranges; + eastl::unique_ptr rootParameter; + }; + + template + class DescriptorHeap + { + public: + using Table = HeapType::Table; + using Slot = HeapType::Slot; + + DescriptorHeap(ID3D12Device5* device, const D3D12_DESCRIPTOR_HEAP_DESC& descriptorHeapDesc) : + descriptorHeapDesc(descriptorHeapDesc) + { + DX::ThrowIfFailed(device->CreateDescriptorHeap(&descriptorHeapDesc, IID_PPV_ARGS(&descriptorHeap))); + descriptorIncrementSize = device->GetDescriptorHandleIncrementSize(descriptorHeapDesc.Type); + } + + ID3D12DescriptorHeap* Heap() const + { + return descriptorHeap.get(); + } + + CD3DX12_CPU_DESCRIPTOR_HANDLE CPUHandle(Slot item, uint offset = 0) const + { + return CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptorHeap->GetCPUDescriptorHandleForHeapStart(), HeapType::GetSlotValue(item) + offset, descriptorIncrementSize); + } + + CD3DX12_CPU_DESCRIPTOR_HANDLE CPUHandle(Slot item, Allocation* allocation) const + { + return CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptorHeap->GetCPUDescriptorHandleForHeapStart(), HeapType::GetSlotValue(item) + allocation->GetIndex(), descriptorIncrementSize); + } + + CD3DX12_GPU_DESCRIPTOR_HANDLE GPUHandle(Slot item, uint offset = 0) const + { + return CD3DX12_GPU_DESCRIPTOR_HANDLE(descriptorHeap->GetGPUDescriptorHandleForHeapStart(), HeapType::GetSlotValue(item) + offset, descriptorIncrementSize); + } + + CD3DX12_GPU_DESCRIPTOR_HANDLE TableGPUHandle(Table type, uint offset = 0) const + { + auto it = descriptorRanges.find(type); + if (it == descriptorRanges.end()) { + throw std::out_of_range("[RT] DescriptorHeap::TableGPUHandle, Table not found."); + } + + Slot firstSlot = it->second->FirstSlot(); + + return CD3DX12_GPU_DESCRIPTOR_HANDLE(descriptorHeap->GetGPUDescriptorHandleForHeapStart(), HeapType::GetSlotValue(firstSlot) + offset, descriptorIncrementSize); + } + + void CreateTable(Table type, D3D12_DESCRIPTOR_RANGE_TYPE rangeType, const eastl::vector>& descriptors) + { + descriptorRanges.emplace(type, eastl::unique_ptr>(new DescriptorTable(rangeType, descriptors))); + } + + eastl::vector GetRootParameters() const + { + eastl::vector rootParams; + for (const auto& type : magic_enum::enum_values()) { + auto it = descriptorRanges.find(type); + if (it != descriptorRanges.end()) { + rootParams.push_back(it->second->GetRootParameter()); + } else { + logger::error("[RT] DescriptorHeap::GetRootParameter Descriptor table {} not found.", magic_enum::enum_name(type)); + } + } + return rootParams; + } + + private: + D3D12_DESCRIPTOR_HEAP_DESC descriptorHeapDesc{}; + winrt::com_ptr descriptorHeap; + uint descriptorIncrementSize{}; + eastl::unordered_map>> descriptorRanges; + }; +} diff --git a/src/Features/Raytracing/Helpers/ModelSpaceToTangent.h b/src/Features/Raytracing/Helpers/ModelSpaceToTangent.h new file mode 100644 index 0000000000..275026b63e --- /dev/null +++ b/src/Features/Raytracing/Helpers/ModelSpaceToTangent.h @@ -0,0 +1,179 @@ +#pragma once + +#include "PCH.h" + +#include "Utils/D3D.h" +#include + +/*float4 Position : POSITION0; +float2 TexCoord0 : TEXCOORD0; +float4 Normal : NORMAL0; +float4 Tangent : TANGENT0; +float4 Color : COLOR0; +float4 Bitangent : BINORMAL0; +float4 LandBlendWeights1 : TEXCOORD1; +float4 LandBlendWeights2 : TEXCOORD2;*/ + +struct ModelSpaceToTangent +{ + winrt::com_ptr vertexShader = nullptr; + winrt::com_ptr vertexDynamicShader = nullptr; + + winrt::com_ptr pixelShader = nullptr; + + winrt::com_ptr inputLayout = nullptr; + + winrt::com_ptr samplerState = nullptr; + + winrt::com_ptr depthStencilState = nullptr; + + winrt::com_ptr rasterState = nullptr; + + winrt::com_ptr blendState = nullptr; + + struct UnpackedVertex + { + float3 position; + half2 texcoord; + float3 normal; + float3 tangent; + uint32_t color; + float3 bitangent; + + UnpackedVertex& operator=(const Vertex& src) + { + position = src.Position; + texcoord = src.Texcoord0; + normal = src.Normal; + tangent = src.Tangent; + color = src.Color.packed; + bitangent = src.Bitangent; + return *this; + } + }; + + ModelSpaceToTangent() + { + std::vector inputDesc = { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R16G16_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "TANGENT", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "BINORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + }; + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\ModelSpaceToTangent.hlsl", {}, "vs_5_0", "vertex", inputDesc, inputLayout.put())); rawPtr) + vertexShader.attach(rawPtr); + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\ModelSpaceToTangent.hlsl", { { "DYNAMIC", "" } }, "vs_5_0", "vertex", inputDesc, inputLayout.put())); rawPtr) + vertexDynamicShader.attach(rawPtr); + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\ModelSpaceToTangent.hlsl", {}, "ps_5_0", "pixel")); rawPtr) + pixelShader.attach(rawPtr); + + auto device = globals::d3d::device; + + D3D11_SAMPLER_DESC sampDesc = {}; + sampDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + sampDesc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP; + sampDesc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP; + sampDesc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP; + sampDesc.MipLODBias = 0.0f; + sampDesc.MaxAnisotropy = 1; + sampDesc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + sampDesc.BorderColor[0] = 0; + sampDesc.BorderColor[1] = 0; + sampDesc.BorderColor[2] = 0; + sampDesc.BorderColor[3] = 0; + sampDesc.MinLOD = 0; + sampDesc.MaxLOD = D3D11_FLOAT32_MAX; + + DX::ThrowIfFailed(device->CreateSamplerState(&sampDesc, samplerState.put())); + + D3D11_DEPTH_STENCIL_DESC dsDesc{}; + dsDesc.DepthEnable = FALSE; + dsDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; + dsDesc.DepthFunc = D3D11_COMPARISON_ALWAYS; + dsDesc.StencilEnable = FALSE; + + DX::ThrowIfFailed(device->CreateDepthStencilState(&dsDesc, depthStencilState.put())); + + D3D11_RASTERIZER_DESC rsDesc{}; + rsDesc.FillMode = D3D11_FILL_SOLID; + rsDesc.CullMode = D3D11_CULL_NONE; // IMPORTANT + rsDesc.FrontCounterClockwise = FALSE; + rsDesc.DepthBias = 0; + rsDesc.DepthBiasClamp = 0.0f; + rsDesc.SlopeScaledDepthBias = 0.0f; + rsDesc.DepthClipEnable = TRUE; + rsDesc.ScissorEnable = FALSE; + rsDesc.MultisampleEnable = FALSE; + rsDesc.AntialiasedLineEnable = FALSE; + + device->CreateRasterizerState(&rsDesc, rasterState.put()); + + D3D11_BLEND_DESC blendDesc{}; + blendDesc.AlphaToCoverageEnable = FALSE; + blendDesc.IndependentBlendEnable = FALSE; + blendDesc.RenderTarget[0].BlendEnable = FALSE; + blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL; + + device->CreateBlendState(&blendDesc, blendState.put()); + } + + void Setup(ID3D11Texture2D* texture) const + { + D3D11_TEXTURE2D_DESC texDesc{}; + texture->GetDesc(&texDesc); + + auto context = globals::d3d::context; + + context->OMSetDepthStencilState(depthStencilState.get(), 0); + + context->RSSetState(rasterState.get()); + + float blendFactor[4] = { 0, 0, 0, 0 }; + context->OMSetBlendState(blendState.get(), blendFactor, 0xffffffff); + + context->PSSetShader(pixelShader.get(), nullptr, 0); + + ID3D11SamplerState* sampler = samplerState.get(); + context->PSSetSamplers(0, 1, &sampler); + + D3D11_VIEWPORT viewport{}; + viewport.Width = static_cast(texDesc.Width); + viewport.Height = static_cast(texDesc.Height); + viewport.MinDepth = 0.0f; + viewport.MaxDepth = 1.0f; + context->RSSetViewports(1, &viewport); + + D3D11_RECT rect{}; + rect.left = 0; + rect.top = 0; + rect.right = static_cast(texDesc.Width); + rect.bottom = static_cast(texDesc.Height); + context->RSSetScissorRects(1, &rect); + + context->IASetInputLayout(inputLayout.get()); + } + + void SetVertexShader(bool dynamic) const + { + globals::d3d::context->VSSetShader(dynamic ? vertexDynamicShader.get() : vertexShader.get(), nullptr, 0); + } + + void Draw(ID3D11Buffer* vertexBuffer, ID3D11Buffer* indexBuffer, uint triangleCount) const + { + auto context = globals::d3d::context; + + context->IASetIndexBuffer(indexBuffer, DXGI_FORMAT_R16_UINT, 0); + + UINT stride = sizeof(UnpackedVertex); + UINT offset = 0; + + context->IASetVertexBuffers(0, 1, &vertexBuffer, &stride, &offset); + + context->DrawIndexed(triangleCount * 3, 0, 0); + } +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Pipeline.h b/src/Features/Raytracing/Pipeline.h new file mode 100644 index 0000000000..c74bfd5aaa --- /dev/null +++ b/src/Features/Raytracing/Pipeline.h @@ -0,0 +1,43 @@ +#pragma once + +#include "Features/Raytracing/Buffer.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/HeapManager.h" +#include "Features/Raytracing/ShaderUtils.h" +#include "Features/Raytracing/Utils.h" +#include +#include + +struct IPipeline +{ + virtual ~IPipeline() = default; + + virtual void Initialize() {} + virtual void CreateRootSignature([[maybe_unused]] ID3D12Device5* device) {} + virtual void CompileShaders([[maybe_unused]] ID3D12Device5* device) {} + virtual void SetupResources([[maybe_unused]] ID3D12Device5* device) {} +}; + +template +struct Pipeline : IPipeline +{ + winrt::com_ptr rootSignature = nullptr; + eastl::unique_ptr> heap = nullptr; +}; + +struct MasterPipeline : IPipeline +{ + std::vector> subPipelines; +}; + +template +struct RaytracingPipeline : Pipeline +{ + winrt::com_ptr stateObject = nullptr; +}; + +template +struct ComputePipeline : Pipeline +{ + winrt::com_ptr pipelineState = nullptr; +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/NRDPipeline.cpp b/src/Features/Raytracing/Pipelines/NRDPipeline.cpp new file mode 100644 index 0000000000..831bb69148 --- /dev/null +++ b/src/Features/Raytracing/Pipelines/NRDPipeline.cpp @@ -0,0 +1,299 @@ +#include "NRDPipeline.h" + +#include "State.h" +#include "Features/Raytracing.h" + +void NRDPipeline::CompileShaders() +{ + +} + +void NRDPipeline::SetupResources(ID3D12Device5* device) +{ + // Initialize NRD: REBLUR, RELAX and SIGMA in one instance + const nrd::DenoiserDesc denoisersDescs[] = { + +#if (NRD_MODE == OCCLUSION) +# if (NRD_COMBINED == 1) + { NRD_ID(REBLUR_DIFFUSE_SPECULAR_OCCLUSION), nrd::Denoiser::REBLUR_DIFFUSE_SPECULAR_OCCLUSION }, +# else + { NRD_ID(REBLUR_DIFFUSE_OCCLUSION), nrd::Denoiser::REBLUR_DIFFUSE_OCCLUSION }, + { NRD_ID(REBLUR_SPECULAR_OCCLUSION), nrd::Denoiser::REBLUR_SPECULAR_OCCLUSION }, +# endif +#elif (NRD_MODE == SH) +# if (NRD_COMBINED == 1) + { NRD_ID(REBLUR_DIFFUSE_SPECULAR_SH), nrd::Denoiser::REBLUR_DIFFUSE_SPECULAR_SH }, +# else + { NRD_ID(REBLUR_DIFFUSE_SH), nrd::Denoiser::REBLUR_DIFFUSE_SH }, + { NRD_ID(REBLUR_SPECULAR_SH), nrd::Denoiser::REBLUR_SPECULAR_SH }, +# endif +#elif (NRD_MODE == DIRECTIONAL_OCCLUSION) + { NRD_ID(REBLUR_DIFFUSE_DIRECTIONAL_OCCLUSION), nrd::Denoiser::REBLUR_DIFFUSE_DIRECTIONAL_OCCLUSION }, +#else +# if (NRD_COMBINED == 1) + { NRD_ID(REBLUR_DIFFUSE_SPECULAR), nrd::Denoiser::REBLUR_DIFFUSE_SPECULAR }, +# else + { NRD_ID(REBLUR_DIFFUSE), nrd::Denoiser::REBLUR_DIFFUSE }, + { NRD_ID(REBLUR_SPECULAR), nrd::Denoiser::REBLUR_SPECULAR }, +# endif +#endif + + // RELAX +#if (NRD_MODE == SH) +# if (NRD_COMBINED == 1) + { NRD_ID(RELAX_DIFFUSE_SPECULAR_SH), nrd::Denoiser::RELAX_DIFFUSE_SPECULAR_SH }, +# else + { NRD_ID(RELAX_DIFFUSE_SH), nrd::Denoiser::RELAX_DIFFUSE_SH }, + { NRD_ID(RELAX_SPECULAR_SH), nrd::Denoiser::RELAX_SPECULAR_SH }, +# endif +#else +# if (NRD_COMBINED == 1) + { NRD_ID(RELAX_DIFFUSE_SPECULAR), nrd::Denoiser::RELAX_DIFFUSE_SPECULAR }, +# else + { NRD_ID(RELAX_DIFFUSE), nrd::Denoiser::RELAX_DIFFUSE }, + { NRD_ID(RELAX_SPECULAR), nrd::Denoiser::RELAX_SPECULAR }, +# endif +#endif + + // SIGMA +#if (NRD_MODE < OCCLUSION) + { NRD_ID(SIGMA_SHADOW), SIGMA_VARIANT }, +#endif + + // REFERENCE + { NRD_ID(REFERENCE), nrd::Denoiser::REFERENCE }, + }; + + nrd::InstanceCreationDesc instanceCreationDesc = {}; + instanceCreationDesc.denoisers = denoisersDescs; + instanceCreationDesc.denoisersNum = _countof(denoisersDescs); + + nrd::Result res = nrd::CreateInstance(instanceCreationDesc, instance); + + if (res != nrd::Result::SUCCESS) { + assert(!"Failed to create NRD instance"); + } + + const nrd::InstanceDesc* instanceDesc = nrd::GetInstanceDesc(*instance); + + eastl::vector samplers; + for (uint32_t samplerIndex = 0; samplerIndex < instanceDesc->samplersNum; samplerIndex++) { + const nrd::Sampler& samplerMode = instanceDesc->samplers[samplerIndex]; + + D3D12_FILTER filter; + D3D12_TEXTURE_ADDRESS_MODE address; + + switch (samplerMode) { + case nrd::Sampler::NEAREST_CLAMP: + filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + address = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + break; + case nrd::Sampler::LINEAR_CLAMP: + filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + address = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + break; + default: + assert(!"Unknown NRD sampler mode"); + break; + } + + samplers.emplace_back( + instanceDesc->samplersBaseRegisterIndex + samplerIndex, + filter, + address, + address, + address, + 0.0f, + 16u, + D3D12_COMPARISON_FUNC_LESS_EQUAL, + D3D12_STATIC_BORDER_COLOR_OPAQUE_WHITE, + 0.0f, + D3D12_FLOAT32_MAX, + D3D12_SHADER_VISIBILITY_ALL, + instanceDesc->constantBufferAndSamplersSpaceIndex); + } + + pipelines.resize(instanceDesc->pipelinesNum); + for (uint32_t pipelineIndex = 0; pipelineIndex < instanceDesc->pipelinesNum; pipelineIndex++) { + const nrd::PipelineDesc& nrdPipelineDesc = instanceDesc->pipelines[pipelineIndex]; + const nrd::ComputeShaderDesc& nrdComputeShader = nrdPipelineDesc.computeShaderDXIL; + + eastl::unique_ptr pipeline = eastl::make_unique(); + + // Root Signature + eastl::vector srvRanges; + eastl::vector uavRanges; + eastl::vector rootParameters; + + uint32_t srvBaseRegister = instanceDesc->resourcesBaseRegisterIndex; + uint32_t uavBaseRegister = instanceDesc->resourcesBaseRegisterIndex; + + for (uint32_t resourceRangeIndex = 0; resourceRangeIndex < nrdPipelineDesc.resourceRangesNum; resourceRangeIndex++) { + const nrd::ResourceRangeDesc& nrdResourceRange = nrdPipelineDesc.resourceRanges[resourceRangeIndex]; + + bool srv = nrdResourceRange.descriptorType == nrd::DescriptorType::TEXTURE; + + CD3DX12_DESCRIPTOR_RANGE1 descRange; + descRange.Init( + srv ? D3D12_DESCRIPTOR_RANGE_TYPE_SRV : D3D12_DESCRIPTOR_RANGE_TYPE_UAV, + nrdResourceRange.descriptorsNum, + srv ? srvBaseRegister : uavBaseRegister, + instanceDesc->resourcesSpaceIndex); + + if (srv) { + srvRanges.push_back(descRange); + srvBaseRegister += nrdResourceRange.descriptorsNum; + } else { + uavRanges.push_back(descRange); + uavBaseRegister += nrdResourceRange.descriptorsNum; + } + } + + if (srvRanges.empty()) { + rootParameters.emplace_back().InitAsDescriptorTable(0, nullptr); + } else { + rootParameters.emplace_back().InitAsDescriptorTable( + (UINT)srvRanges.size(), + srvRanges.data(), + D3D12_SHADER_VISIBILITY_ALL); + } + + if (uavRanges.empty()) { + rootParameters.emplace_back().InitAsDescriptorTable(0, nullptr); + } else { + rootParameters.emplace_back().InitAsDescriptorTable( + (UINT)uavRanges.size(), + uavRanges.data(), + D3D12_SHADER_VISIBILITY_ALL); + } + + CD3DX12_ROOT_PARAMETER1 constantRootParam; + constantRootParam.InitAsConstantBufferView(instanceDesc->constantBufferRegisterIndex, instanceDesc->constantBufferAndSamplersSpaceIndex); + rootParameters.push_back(constantRootParam); + + auto flags = D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS; + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSigDesc; + rootSigDesc.Init_1_1( + (uint)rootParameters.size(), + rootParameters.data(), + (uint)samplers.size(), + samplers.data(), + flags); + + winrt::com_ptr serializedRootSig = nullptr; + winrt::com_ptr errorBlob = nullptr; + + DX::ThrowIfFailed(D3DX12SerializeVersionedRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1_1, serializedRootSig.put(), errorBlob.put())); + DX::ThrowIfFailed(device->CreateRootSignature(0, serializedRootSig->GetBufferPointer(), serializedRootSig->GetBufferSize(), IID_PPV_ARGS(pipeline->rootSignature.put()))); + DX::ThrowIfFailed(pipeline->rootSignature->SetName(std::format(L"Compute Root Signature - NRD {}", pipelineIndex).c_str())); + + // Shader and Pipeline State + D3D12_COMPUTE_PIPELINE_STATE_DESC computeDesc = {}; + computeDesc.pRootSignature = pipeline->rootSignature.get(); + computeDesc.CS = { nrdComputeShader.bytecode, nrdComputeShader.size }; + + DX::ThrowIfFailed(device->CreateComputePipelineState(&computeDesc, IID_PPV_ARGS(pipeline->pipelineState.put()))); + DX::ThrowIfFailed(pipeline->pipelineState->SetName(std::format(L"Compute Pipeline - NRD {}", pipelineIndex).c_str())); + + pipelines[pipelineIndex] = eastl::move(pipeline); + + /*integrationCreationDesc.resourceWidth = resourceWidth; + integrationCreationDesc.resourceHeight = resourceHeight; + + // Also NRD needs to be recreated on "resize" + nrd::Result result = NRD.RecreateD3D12(integrationCreationDesc, instanceCreationDesc, deviceCreationD3D12Desc);*/ + } +} + +void NRDPipeline::SetupTextureResources([[maybe_unused]] uint2 size) +{ + +} + +void NRDPipeline::UpdateCommonSettings() +{ + auto state = globals::state; + + std::memcpy(&commonSettings.worldToViewMatrixPrev, &commonSettings.worldToViewMatrix, sizeof(Matrix)); + std::memcpy(&commonSettings.worldToViewMatrix, &globals::game::frameBufferCached.GetCameraViewInverse().Transpose().m, sizeof(Matrix)); + + std::memcpy(&commonSettings.viewToClipMatrixPrev, &commonSettings.viewToClipMatrixPrev, sizeof(Matrix)); + std::memcpy(&commonSettings.viewToClipMatrix, &globals::game::frameBufferCached.GetCameraProjUnjittered().Transpose().m, sizeof(Matrix)); + + commonSettings.isMotionVectorInWorldSpace = false; + commonSettings.motionVectorScale[0] = (commonSettings.isMotionVectorInWorldSpace) ? (1.f) : (1.f / state->screenSize.x); + commonSettings.motionVectorScale[1] = (commonSettings.isMotionVectorInWorldSpace) ? (1.f) : (1.f / state->screenSize.y); + commonSettings.motionVectorScale[2] = 1.0f; + + auto& rt = globals::features::raytracing; + + auto screenSize = rt.GetScreenSize(); + auto phaseCount = rt.GetJitterPhaseCount(rt.renderSize.x, screenSize.x); + + commonSettings.cameraJitterPrev[0] = jitter.x; + commonSettings.cameraJitterPrev[1] = jitter.y; + + rt.GetJitterOffset(&jitter.x, &jitter.y, state->frameCount, phaseCount); + + commonSettings.cameraJitter[0] = jitter.x; + commonSettings.cameraJitter[1] = jitter.y; + + commonSettings.frameIndex = frameIndex; + + commonSettings.denoisingRange = kMaxSceneDistance * 2; // with various bounces (in non-primary planes or with PSR) the virtual view Z can be much longer, so adding 2x! + //commonSettings.enableValidation = enableValidation && renderTargets.DenoiserOutValidation != nullptr; + //commonSettings.disocclusionThreshold = disocclusionThreshold; + //commonSettings.disocclusionThresholdAlternate = disocclusionThresholdAlternate; + //commonSettings.isDisocclusionThresholdMixAvailable = useDisocclusionThresholdAlternateMix; + //commonSettings.timeDeltaBetweenFrames = timeDeltaBetweenFrames; + //commonSettings.accumulationMode = (resetHistory) ? (nrd::AccumulationMode::CLEAR_AND_RESTART) : (nrd::AccumulationMode::CONTINUE); + + auto const& textureDesc = rt.outputTexture->resource->GetDesc(); + + if (frameIndex == 0) { + commonSettings.resourceSizePrev[0] = static_cast(textureDesc.Width); + commonSettings.resourceSizePrev[1] = static_cast(textureDesc.Height); + commonSettings.rectSizePrev[0] = static_cast(textureDesc.Width); + commonSettings.rectSizePrev[1] = static_cast(textureDesc.Height); + } else { + commonSettings.resourceSizePrev[0] = commonSettings.resourceSize[0]; + commonSettings.resourceSizePrev[1] = commonSettings.resourceSize[1]; + commonSettings.rectSizePrev[0] = commonSettings.rectSize[0]; + commonSettings.rectSizePrev[1] = commonSettings.rectSize[1]; + } + + commonSettings.resourceSize[0] = static_cast(textureDesc.Width); + commonSettings.resourceSize[1] = static_cast(textureDesc.Height); + commonSettings.rectSize[0] = static_cast(textureDesc.Width); + commonSettings.rectSize[1] = static_cast(textureDesc.Height); + + nrd::SetCommonSettings(*instance, commonSettings); +} + +void NRDPipeline::Denoise([[maybe_unused]]ID3D12GraphicsCommandList4* commandList) +{ + nrd::Identifier identifier = NRD_ID(RELAX_DIFFUSE_SPECULAR); + + nrd::SetDenoiserSettings(*instance, identifier, &settings.RelaxSettings); + + UpdateCommonSettings(); + + const nrd::DispatchDesc* dispatchDescs = nullptr; + uint32_t dispatchDescNum = 0; + nrd::GetComputeDispatches(*instance, &identifier, 1, dispatchDescs, dispatchDescNum); + + //const nrd::InstanceDesc* instanceDesc = nrd::GetInstanceDesc(*instance); + + for (uint32_t dispatchIndex = 0; dispatchIndex < dispatchDescNum; dispatchIndex++) { + const nrd::DispatchDesc& dispatchDesc = dispatchDescs[dispatchIndex]; + + if (dispatchDesc.name) { + logger::info("[RT] NRDPipeline::Denoise: {}", dispatchDesc.name); + } + } + + frameIndex++; +} \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/NRDPipeline.h b/src/Features/Raytracing/Pipelines/NRDPipeline.h new file mode 100644 index 0000000000..7ab2b787c7 --- /dev/null +++ b/src/Features/Raytracing/Pipelines/NRDPipeline.h @@ -0,0 +1,87 @@ +#pragma once + +#include "Features/Raytracing/Buffer.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/HeapManager.h" +#include "Features/Raytracing/Pipeline.h" +#include "Features/Raytracing/Utils.h" +#include +#include + +#include +#include "Raytracing/Denoiser/NRD/Shared.hlsli" + +#include "Features/Raytracing/Types.h" + +#include "Features/Upscaling/DX12SwapChain.h" + +// NRD sample doesn't use several instances of the same denoiser in one NRD instance (like REBLUR_DIFFUSE x 3), +// thus we can use fields of "nrd::Denoiser" enum as unique identifiers +#define NRD_ID(x) nrd::Identifier(nrd::Denoiser::x) + +#define NRD_COMBINED 1 + +#if (SIGMA_TRANSLUCENCY == 1) +# define SIGMA_VARIANT nrd::Denoiser::SIGMA_SHADOW_TRANSLUCENCY +#else +# define SIGMA_VARIANT nrd::Denoiser::SIGMA_SHADOW +#endif + +struct NDRSubPipeline : IPipeline +{ + winrt::com_ptr rootSignature = nullptr; + winrt::com_ptr pipelineState = nullptr; +}; + +struct NRDPipeline : IPipeline +{ + // NRD + nrd::CommonSettings commonSettings = {}; + + nrd::SigmaSettings m_SigmaSettings = {}; + nrd::ReferenceSettings m_ReferenceSettings = {}; + + //eastl::unordered_map denoisers; + eastl::vector> pipelines; + + nrd::Instance* instance = nullptr; + + float2 jitter = { 0, 0 }; + uint frameIndex = 0; + + static constexpr float kMaxSceneDistance = 50000.0f; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT( + nrd::RelaxSettings, + checkerboardMode, + hitDistanceReconstructionMode, + diffuseMaxAccumulatedFrameNum, + specularMaxAccumulatedFrameNum, + diffuseMaxFastAccumulatedFrameNum, + specularMaxFastAccumulatedFrameNum, + fastHistoryClampingSigmaScale) + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT( + nrd::ReblurSettings, + checkerboardMode, + hitDistanceReconstructionMode, + maxAccumulatedFrameNum, + maxFastAccumulatedFrameNum, + maxStabilizedFrameNum, + fastHistoryClampingSigmaScale) + + struct Settings + { + nrd::Denoiser Denoiser = nrd::Denoiser::RELAX_DIFFUSE; + nrd::RelaxSettings RelaxSettings = {}; + nrd::ReblurSettings ReblurSettings = {}; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Settings, RelaxSettings, ReblurSettings) + } settings; + + void CompileShaders(); + virtual void SetupResources(ID3D12Device5* device) override; + void UpdateCommonSettings(); + void Denoise(ID3D12GraphicsCommandList4* commandList); + void SetupTextureResources(uint2 size); +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/SHaRCPipeline.cpp b/src/Features/Raytracing/Pipelines/SHaRCPipeline.cpp new file mode 100644 index 0000000000..8199fc4e4b --- /dev/null +++ b/src/Features/Raytracing/Pipelines/SHaRCPipeline.cpp @@ -0,0 +1,111 @@ +#include "SHaRCPipeline.h" +#include "Features/Raytracing.h" + +void SHaRCPipeline::CreateRootSignature(ID3D12Device5* device) +{ + heap = eastl::make_unique>( + device, + D3D12_DESCRIPTOR_HEAP_DESC(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, SHaRCHeap::NumDescriptors(), D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + + heap->CreateTable( + SHaRCHeap::Table::UAV, + D3D12_DESCRIPTOR_RANGE_TYPE_UAV, + { { SHaRCHeap::Slot::SHaRCHashEntries, 1 }, + { SHaRCHeap::Slot::SHaRCAccumulation, 1 }, + { SHaRCHeap::Slot::SHaRCResolved, 1 } }); + + auto rootParameters = heap->GetRootParameters(); + + CD3DX12_ROOT_PARAMETER1 constantRootParam; + constantRootParam.InitAsConstantBufferView(0, 0); + rootParameters.push_back(constantRootParam); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSigDesc; + rootSigDesc.Init_1_1( + static_cast(rootParameters.size()), + rootParameters.data(), + 0, + nullptr, + D3D12_ROOT_SIGNATURE_FLAG_NONE); + + winrt::com_ptr serializedRootSig = nullptr; + winrt::com_ptr errorBlob = nullptr; + + DX::ThrowIfFailed(D3DX12SerializeVersionedRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1_1, serializedRootSig.put(), errorBlob.put())); + DX::ThrowIfFailed(device->CreateRootSignature(0, serializedRootSig->GetBufferPointer(), serializedRootSig->GetBufferSize(), IID_PPV_ARGS(rootSignature.put()))); + DX::ThrowIfFailed(rootSignature->SetName(L"Compute Root Signature - SHaRC")); +} + +void SHaRCPipeline::CompileShaders(ID3D12Device5* device) +{ + winrt::com_ptr shaderBlob = nullptr; + ShaderUtils::CompileShader(shaderBlob, L"Data/Shaders/Raytracing/SharcResolveCS.hlsl", {}, L"cs_6_5"); + + D3D12_COMPUTE_PIPELINE_STATE_DESC computeDesc = {}; + computeDesc.pRootSignature = rootSignature.get(); + computeDesc.CS = { shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize() }; + + DX::ThrowIfFailed(device->CreateComputePipelineState(&computeDesc, IID_PPV_ARGS(pipelineState.put()))); + DX::ThrowIfFailed(pipelineState->SetName(L"Compute Pipeline - SHaRC")); +} + +void SHaRCPipeline::SetupResources(ID3D12Device5* device) +{ + auto* commandList = globals::features::raytracing.commandList.get(); + + sharcHashEntriesBuffer = eastl::make_unique>(device, MAX_CAPACITY, true); + sharcHashEntriesBuffer->SetName(L"SHaRC HashEntries Buffer"); + sharcHashEntriesBuffer->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + sharcLockBuffer = eastl::make_unique>(device, MAX_CAPACITY, true); + sharcLockBuffer->SetName(L"SHaRC Lock Buffer"); + sharcLockBuffer->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + sharcAccumulationBuffer = eastl::make_unique>(device, MAX_CAPACITY, true); + sharcAccumulationBuffer->SetName(L"SHaRC Accumulation Buffer"); + sharcAccumulationBuffer->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + + sharcResolvedBuffer = eastl::make_unique>(device, MAX_CAPACITY, true); + sharcResolvedBuffer->SetName(L"SHaRC Resolved Buffer"); + sharcResolvedBuffer->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); +} + +void SHaRCPipeline::CreateUAVs(CD3DX12_CPU_DESCRIPTOR_HANDLE hashEntries, CD3DX12_CPU_DESCRIPTOR_HANDLE lock, CD3DX12_CPU_DESCRIPTOR_HANDLE accumulation, CD3DX12_CPU_DESCRIPTOR_HANDLE resolved) +{ + // UAVs for resolve + sharcHashEntriesBuffer->CreateUAV(heap->CPUHandle(SHaRCHeap::Slot::SHaRCHashEntries)); + sharcAccumulationBuffer->CreateUAV(heap->CPUHandle(SHaRCHeap::Slot::SHaRCAccumulation)); + sharcResolvedBuffer->CreateUAV(heap->CPUHandle(SHaRCHeap::Slot::SHaRCResolved)); + + // UAVs for RT + sharcHashEntriesBuffer->CreateUAV(hashEntries); + sharcLockBuffer->CreateUAV(lock); + sharcAccumulationBuffer->CreateUAV(accumulation); + sharcResolvedBuffer->CreateUAV(resolved); +} + +void SHaRCPipeline::Resolve(ID3D12GraphicsCommandList4* commandList, ID3D12Resource* frameBuffer) +{ + commandList->SetPipelineState(pipelineState.get()); + commandList->SetComputeRootSignature(rootSignature.get()); + + auto* pHeap = heap->Heap(); + commandList->SetDescriptorHeaps(1, &pHeap); + + commandList->SetComputeRootDescriptorTable(0, heap->TableGPUHandle(SHaRCHeap::Table::UAV)); + + commandList->SetComputeRootConstantBufferView(1, frameBuffer->GetGPUVirtualAddress()); + + CD3DX12_RESOURCE_BARRIER uavBarrier[3] = { + CD3DX12_RESOURCE_BARRIER::UAV(sharcHashEntriesBuffer->resource.get()), + CD3DX12_RESOURCE_BARRIER::UAV(sharcAccumulationBuffer->resource.get()), + CD3DX12_RESOURCE_BARRIER::UAV(sharcResolvedBuffer->resource.get()) + }; + + commandList->ResourceBarrier(_countof(uavBarrier), uavBarrier); + + const uint dispatchSize = DivideRoundUp(MAX_CAPACITY, GROUP_SIZE); + commandList->Dispatch(dispatchSize, 1, 1); + + commandList->ResourceBarrier(_countof(uavBarrier), uavBarrier); +} \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/SHaRCPipeline.h b/src/Features/Raytracing/Pipelines/SHaRCPipeline.h new file mode 100644 index 0000000000..215be36cb7 --- /dev/null +++ b/src/Features/Raytracing/Pipelines/SHaRCPipeline.h @@ -0,0 +1,79 @@ +#pragma once + +#include "Features/Raytracing/Buffer.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/HeapManager.h" +#include "Features/Raytracing/Pipeline.h" +#include "PCH.h" +#include + +#include "Features/Raytracing/Types.h" + +#include "Raytracing/Includes/RT/SHaRC/SharcTypes.h" +#include "Raytracing/Includes/Types/FrameData.hlsli" + +struct SHaRCHeapDef +{ + enum class Table + { + UAV + }; + + enum class Slot + { + SHaRCHashEntries, + SHaRCAccumulation, + SHaRCResolved, + NumDescriptors, + None + }; +}; +using SHaRCHeap = Heap; + +struct SHaRCPipeline : ComputePipeline +{ + static constexpr uint GROUP_SIZE = 256; + static constexpr size_t MAX_CAPACITY = 4 * 1024 * 1024; + + eastl::unique_ptr> sharcHashEntriesBuffer = nullptr; + eastl::unique_ptr> sharcLockBuffer = nullptr; + eastl::unique_ptr> sharcAccumulationBuffer = nullptr; + eastl::unique_ptr> sharcResolvedBuffer = nullptr; + + struct Settings + { + float SceneScale = 1.0f; + int AccumFrameNum = 10; + int StaleFrameNum = 64; + float RadianceScale = 1e3f; + bool AntifireflyFilter = true; + + Settings() = default; + Settings(const Settings&) = default; + + Settings& operator=(const Settings&) = default; + bool operator==(const Settings&) const = default; + bool operator!=(const Settings&) const = default; + + SHaRCFrameData GetFrameData(bool updatePass) const + { + return { + .SceneScale = SceneScale, + .AccumFrameNum = (uint)AccumFrameNum, + .StaleFrameNum = (uint)StaleFrameNum, + .RadianceScale = RadianceScale, + .AntifireflyFilter = AntifireflyFilter, + .Capacity = SHaRCPipeline::MAX_CAPACITY, + .UpdatePass = updatePass + }; + } + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Settings, SceneScale, AccumFrameNum, StaleFrameNum, AntifireflyFilter) + }; + + void CreateRootSignature(ID3D12Device5* device) override; + void CompileShaders(ID3D12Device5* device) override; + void SetupResources(ID3D12Device5* device) override; + void Resolve(ID3D12GraphicsCommandList4* commandList, ID3D12Resource* frameBuffer); + void CreateUAVs(CD3DX12_CPU_DESCRIPTOR_HANDLE hashEntries, CD3DX12_CPU_DESCRIPTOR_HANDLE lock, CD3DX12_CPU_DESCRIPTOR_HANDLE accumulation, CD3DX12_CPU_DESCRIPTOR_HANDLE resolved); +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/SVGFPipeline.cpp b/src/Features/Raytracing/Pipelines/SVGFPipeline.cpp new file mode 100644 index 0000000000..5adf05bd01 --- /dev/null +++ b/src/Features/Raytracing/Pipelines/SVGFPipeline.cpp @@ -0,0 +1,188 @@ +#include "SVGFPipeline.h" +#include "State.h" + +void SVGFPipeline::CompileShaders() +{ + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\Denoiser\\SVGF\\TemporalCS.hlsl", { { "DX11", "" } }, "cs_5_0")); rawPtr) + temporalCS.attach(rawPtr); + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\Denoiser\\SVGF\\VarianceCS.hlsl", { { "DX11", "" } }, "cs_5_0")); rawPtr) + varianceCS.attach(rawPtr); + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\Denoiser\\SVGF\\SpatialCS.hlsl", { { "DX11", "" } }, "cs_5_0")); rawPtr) + spatialDiffuseCS.attach(rawPtr); + + if (auto rawPtr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\Raytracing\\Denoiser\\SVGF\\SpatialCS.hlsl", { { "DX11", "" }, { "SSRT_SPECULAR", "" } }, "cs_5_0")); rawPtr) + spatialSpecularCS.attach(rawPtr); +} + +void SVGFPipeline::SetupResources() +{ + frameData = eastl::make_unique(); + + auto cbDesc = ConstantBufferDesc(); + frameBuffer = eastl::make_unique(cbDesc); + + CompileShaders(); +} + +void SVGFPipeline::SetupTextureResources(uint2 size) +{ + // RGBA16 + { + temporalTexture = CreateTexture2D(size, DXGI_FORMAT_R16G16B16A16_FLOAT, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS); + + for (uint i = 0; i < HISTORY_TEXTURES; i++) + historyTexture[i] = CreateTexture2D(size, DXGI_FORMAT_R16G16B16A16_FLOAT, D3D11_BIND_SHADER_RESOURCE); + + varianceTexture = CreateTexture2D(size, DXGI_FORMAT_R16G16B16A16_FLOAT, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS); + } + + // RG11B10 + { + momentsTexture = CreateTexture2D(size, DXGI_FORMAT_R11G11B10_FLOAT, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS); + + for (uint i = 0; i < HISTORY_TEXTURES; i++) + historyMomentsTexture[i] = CreateTexture2D(size, DXGI_FORMAT_R11G11B10_FLOAT, D3D11_BIND_SHADER_RESOURCE); + } + + // RG32 + depthLinearTexture = CreateTexture2D(size, DXGI_FORMAT_R32G32_FLOAT, D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS); + + historyDepthTexture = CreateTexture2D(size, DXGI_FORMAT_R32G32_FLOAT, D3D11_BIND_SHADER_RESOURCE); + + // RGBA16 SNORM + historyNormalsTexture = CreateTexture2D(size, DXGI_FORMAT_R16G16B16A16_SNORM, D3D11_BIND_SHADER_RESOURCE); +} + +void SVGFPipeline::Denoise(ID3D11DeviceContext4* context, uint2 renderSize, Settings settings, WrappedResource* normalRoughness, WrappedResource* color, const bool diffuse) const +{ + const uint historyIndex = diffuse ? 0 : 1; + + const uint2 dispatchCount = { DivideRoundUp(renderSize.x, 8u), DivideRoundUp(renderSize.y, 8u) }; + + frameData->Alpha = 1.0f / static_cast(settings.AlphaFrames); + frameData->MomentsAlpha = 1.0f / static_cast(settings.MomentsAlphaFrames); + frameData->AtrousIterations = settings.AtrousIterations; + + frameData->ColorPhi = settings.ColorPhi; + frameData->NormalPhi = settings.NormalPhi; + frameData->DepthPhi = settings.DepthPhi / Util::Units::GAME_UNIT_TO_M; + + frameData->DepthThreshold = settings.DepthThreshold / Util::Units::GAME_UNIT_TO_M; + frameData->NormalThreshold = std::cosf(static_cast(settings.NormalThreshold)); + frameData->HistoryThreshold = settings.HistoryThreshold; + + auto eye = Util::GetCameraData(0); + float2 ndcToViewMult = float2(2.0f / eye.projMat(0, 0), -2.0f / eye.projMat(1, 1)); + float2 ndcToViewAdd = float2(-1.0f / eye.projMat(0, 0), 1.0f / eye.projMat(1, 1)); + + frameData->NDCToView = float4(ndcToViewMult.x, ndcToViewMult.y, ndcToViewAdd.x, ndcToViewAdd.y); + + frameBuffer->Update(frameData.get(), sizeof(SVGF)); + + auto cb = frameBuffer->CB(); + context->CSSetConstantBuffers(1, 1, &cb); + + cb = globals::state->sharedDataCB->CB(); + context->CSSetConstantBuffers(5, 1, &cb); + + cb = *globals::game::perFrame.get(); + context->CSSetConstantBuffers(12, 1, &cb); + + std::array srvs = { nullptr }; + std::array uavs = { nullptr }; + + auto resetViews = [&]() { + srvs.fill(nullptr); + uavs.fill(nullptr); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + }; + + auto renderer = globals::game::renderer; + auto motion = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMOTION_VECTOR]; + auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; + + // temporal filter + srvs.at(0) = historyTexture[historyIndex]->srv.get(); + srvs.at(1) = motion.SRV; + srvs.at(2) = normalRoughness->srv; + srvs.at(3) = color->srv; + srvs.at(4) = historyMomentsTexture[historyIndex]->srv.get(); + srvs.at(5) = historyDepthTexture->srv.get(); + srvs.at(6) = historyNormalsTexture->srv.get(); + srvs.at(7) = depth.depthSRV; + + uavs.at(0) = temporalTexture->uav.get(); + uavs.at(1) = momentsTexture->uav.get(); + uavs.at(2) = depthLinearTexture->uav.get(); + + context->CSSetShaderResources(0, 7, srvs.data()); + context->CSSetUnorderedAccessViews(0, 3, uavs.data(), nullptr); + context->CSSetShader(temporalCS.get(), nullptr, 0); + + context->Dispatch((uint)dispatchCount.x, (uint)dispatchCount.y, 1); + resetViews(); + + context->CopyResource(historyMomentsTexture[historyIndex]->resource.get(), momentsTexture->resource.get()); + + // variance filter + if (settings.Variance) { + srvs.at(0) = historyTexture[historyIndex]->srv.get(); + srvs.at(1) = momentsTexture->srv.get(); + srvs.at(2) = normalRoughness->srv; + srvs.at(3) = temporalTexture->srv.get(); + srvs.at(4) = depthLinearTexture->srv.get(); + + uavs.at(0) = varianceTexture->uav.get(); + + context->CSSetShaderResources(0, 5, srvs.data()); + context->CSSetUnorderedAccessViews(0, 1, uavs.data(), nullptr); + context->CSSetShader(varianceCS.get(), nullptr, 0); + + context->Dispatch((uint)dispatchCount.x, (uint)dispatchCount.y, 1); + resetViews(); + } else { + context->CopyResource(varianceTexture->resource.get(), temporalTexture->resource.get()); + } + + // spatial filter + if (settings.Spatial) { + for (uint i = 0; i < settings.AtrousIterations; ++i) { + frameData->AtrousIterations = 1 << i; + frameBuffer->Update(frameData.get(), sizeof(SVGF)); + + cb = frameBuffer->CB(); + context->CSSetConstantBuffers(1, 1, &cb); + + srvs.at(0) = (i % 2 == 0) ? varianceTexture->srv.get() : color->srv; + srvs.at(2) = normalRoughness->srv; + srvs.at(4) = depthLinearTexture->srv.get(); + + uavs.at(0) = (i % 2 == 0) ? color->uav : varianceTexture->uav.get(); + + context->CSSetShaderResources(0, 5, srvs.data()); + context->CSSetUnorderedAccessViews(0, 1, uavs.data(), nullptr); + context->CSSetShader(diffuse ? spatialDiffuseCS.get() : spatialSpecularCS.get(), nullptr, 0); + + context->Dispatch((uint)dispatchCount.x, (uint)dispatchCount.y, 1); + + resetViews(); + } + + if (settings.AtrousIterations % 2 == 0) { + context->CopyResource(color->resource11, varianceTexture->resource.get()); + } + } else { + context->CopyResource(color->resource11, varianceTexture->resource.get()); + } + + context->CopyResource(historyDepthTexture->resource.get(), depthLinearTexture->resource.get()); + //context->CopyResource(historyDepthTexture->resource.get(), depth.texture); + context->CopyResource(historyNormalsTexture->resource.get(), normalRoughness->resource11); + context->CopyResource(historyTexture[historyIndex]->resource.get(), color->resource11); + + context->CSSetShader(nullptr, nullptr, 0); +} \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/SVGFPipeline.h b/src/Features/Raytracing/Pipelines/SVGFPipeline.h new file mode 100644 index 0000000000..e06fd0b6be --- /dev/null +++ b/src/Features/Raytracing/Pipelines/SVGFPipeline.h @@ -0,0 +1,64 @@ +#pragma once + +#include "Features/Raytracing/Buffer.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/HeapManager.h" +#include "Features/Raytracing/Pipeline.h" +#include "Features/Raytracing/Utils.h" +#include +#include + +#include "Features/Raytracing/Types.h" + +#include "Raytracing/Denoiser/SVGF/SVGF.hlsli" + +#include "Buffer.h" + +#include "Features/Upscaling/DX12SwapChain.h" + +struct SVGFPipeline +{ + // Diffuse and Specular + static constexpr uint HISTORY_TEXTURES = 2; + + eastl::unique_ptr temporalTexture = nullptr; + eastl::unique_ptr momentsTexture = nullptr; + eastl::unique_ptr varianceTexture = nullptr; + eastl::array, HISTORY_TEXTURES> historyMomentsTexture; + eastl::unique_ptr historyDepthTexture = nullptr; + eastl::unique_ptr historyNormalsTexture = nullptr; + eastl::array, HISTORY_TEXTURES> historyTexture; + + eastl::unique_ptr depthLinearTexture = nullptr; + + eastl::unique_ptr frameData = nullptr; + eastl::unique_ptr frameBuffer = nullptr; + + winrt::com_ptr temporalCS = nullptr; + winrt::com_ptr varianceCS = nullptr; + winrt::com_ptr spatialDiffuseCS = nullptr; + winrt::com_ptr spatialSpecularCS = nullptr; + + struct Settings + { + uint AlphaFrames = 20; + uint MomentsAlphaFrames = 10; + uint AtrousIterations = 2; + float ColorPhi = 1.0f; + float NormalPhi = 256.0f; + float DepthPhi = 0.05f; + float DepthThreshold = 0.1f; + uint NormalThreshold = 30; + uint HistoryThreshold = 2; + bool Variance = true; + bool Spatial = true; + + NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Settings, AlphaFrames, MomentsAlphaFrames, AtrousIterations, ColorPhi, NormalPhi, DepthPhi, DepthThreshold, NormalThreshold, HistoryThreshold, Variance, Spatial) + }; + + void CompileShaders(); + void SetupResources(); + void Denoise(ID3D11DeviceContext4* context, uint2 renderSize, Settings settings, WrappedResource* normalRoughness, WrappedResource* color, const bool diffuse = true) const; + + void SetupTextureResources(uint2 size); +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/SkinningPipeline.cpp b/src/Features/Raytracing/Pipelines/SkinningPipeline.cpp new file mode 100644 index 0000000000..d7bf2e1304 --- /dev/null +++ b/src/Features/Raytracing/Pipelines/SkinningPipeline.cpp @@ -0,0 +1,225 @@ +#include "SkinningPipeline.h" + +#include "Features/Raytracing.h" + +void SkinningPipeline::CreateRootSignature(ID3D12Device5* device) +{ + heap = eastl::make_unique>( + device, + D3D12_DESCRIPTOR_HEAP_DESC(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, SkinningHeap::NumDescriptors(), D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)); + + heap->CreateTable( + SkinningHeap::Table::UAV, + D3D12_DESCRIPTOR_RANGE_TYPE_UAV, + { { SkinningHeap::Slot::Output, UINT_MAX, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE } }); + + heap->CreateTable( + SkinningHeap::Table::SRV, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { SkinningHeap::Slot::UpdateData, 1, 0 }, + { SkinningHeap::Slot::BoneMatrices, 1, 0 } }); + + heap->CreateTable( + SkinningHeap::Table::DynamicBuffer, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { SkinningHeap::Slot::DynamicVertices, UINT_MAX, 1, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE } }); + + heap->CreateTable( + SkinningHeap::Table::VertexBuffer, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { SkinningHeap::Slot::Vertices, UINT_MAX, 2, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE } }); + + heap->CreateTable( + SkinningHeap::Table::SkinningBuffer, + D3D12_DESCRIPTOR_RANGE_TYPE_SRV, + { { SkinningHeap::Slot::SkinningData, UINT_MAX, 3, D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE } }); + + auto rootParameters = heap->GetRootParameters(); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSigDesc; + rootSigDesc.Init_1_1( + static_cast(rootParameters.size()), + rootParameters.data(), + 0, + nullptr, + D3D12_ROOT_SIGNATURE_FLAG_NONE); + + winrt::com_ptr serializedRootSig; + winrt::com_ptr errorBlob; + + DX::ThrowIfFailed(D3DX12SerializeVersionedRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1_1, serializedRootSig.put(), errorBlob.put())); + DX::ThrowIfFailed(device->CreateRootSignature(0, serializedRootSig->GetBufferPointer(), serializedRootSig->GetBufferSize(), IID_PPV_ARGS(rootSignature.put()))); + DX::ThrowIfFailed(rootSignature->SetName(L"Compute Root Signature - Skinning")); +} + +void SkinningPipeline::CompileShaders(ID3D12Device5* device) +{ + const auto threadSizeWStr = std::to_wstring(settings.ThreadGroupSize); + auto mapping = settings.OptimizedMapping ? L"OPTIMIZED_MAPPING" : L"STANDARD_MAPPING"; + + winrt::com_ptr shaderBlob; + ShaderUtils::CompileShader(shaderBlob, L"Data/Shaders/Raytracing/SkinningCS.hlsl", { { L"THREAD_GROUP_SIZE", threadSizeWStr.c_str() }, { mapping, L"" } }, L"cs_6_5"); + + D3D12_COMPUTE_PIPELINE_STATE_DESC computeDesc = {}; + computeDesc.pRootSignature = rootSignature.get(); + computeDesc.CS = { shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize() }; + + DX::ThrowIfFailed(device->CreateComputePipelineState(&computeDesc, IID_PPV_ARGS(pipelineState.put()))); + DX::ThrowIfFailed(pipelineState->SetName(L"Compute Pipeline - Skinning")); + + recompile = false; +} + +void SkinningPipeline::SetupResources(ID3D12Device5* device) +{ + auto* commandList = globals::features::raytracing.commandList.get(); + + { + vertexUpdateBuffer = eastl::make_unique>(device, MAX_GEOMETRY, false); + vertexUpdateBuffer->SetName(L"Vertex Update Buffer"); + vertexUpdateBuffer->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + vertexUpdateBuffer->CreateSRV(heap->CPUHandle(SkinningHeap::Slot::UpdateData)); + + DX::ThrowIfFailed(vertexUpdateBuffer->UploadResource()->Map(0, nullptr, reinterpret_cast(&vertexUpdateData))); + } + + { + boneMatricesBuffer = eastl::make_unique>(device, MAX_BONE_MATRICES, false); + boneMatricesBuffer->SetName(L"Bone Matrices Buffer"); + boneMatricesBuffer->TransitionBarrier(commandList, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + + boneMatricesBuffer->CreateSRV(heap->CPUHandle(SkinningHeap::Slot::BoneMatrices)); + + DX::ThrowIfFailed(boneMatricesBuffer->UploadResource()->Map(0, nullptr, reinterpret_cast(&boneMatricesData))); + } +} + +void SkinningPipeline::QueueUpdate(Shape::Flags updateFlags, eastl::string path, Shape* shape) +{ + queuedShapes.emplace( + shape, + QueuedShape{ updateFlags, path }); +} + +bool SkinningPipeline::PrepareResources(ID3D12GraphicsCommandList4* commandList, uint& numShapes, uint& numVertices) +{ + if (queuedShapes.empty()) + return false; + + auto queueSize = queuedShapes.size(); + + // Barrier to UAV state + barriers.clear(); + barriers.reserve(queueSize); + + uint shapeIndex = 0; + uint boneMatrixIndex = 0; + + for (auto& [shape, queuedShape] : queuedShapes) { + if (shapeIndex >= MAX_GEOMETRY) { + logger::critical("[RT] SkinningPipeline::PrepareResources - Exceeded maximum geometry update limit of {}", MAX_GEOMETRY); + break; + } + + if (boneMatrixIndex >= MAX_BONE_MATRICES) { + logger::critical("[RT] SkinningPipeline::PrepareResources - Exceeded maximum bone matrices limit of {}", MAX_BONE_MATRICES); + break; + } + + numVertices = std::max(numVertices, shape->vertexCount); + + vertexUpdateData[shapeIndex] = VertexUpdateData(shape->allocation->GetIndex(), queuedShape.updateFlags, shape->vertexCount, boneMatrixIndex, shape->flags.underlying()); + shapeIndex++; + + // Dynamic TriShapes + if (queuedShape.updateFlags & Shape::Flags::Dynamic) + shape->UpdateUploadDynamicBuffers(commandList); + + // Skinning - This is a bit more involved + if (queuedShape.updateFlags & Shape::Flags::Skinned) { + const auto numBoneMatrices = static_cast(shape->boneMatrices.size()); + + std::memcpy(boneMatricesData + boneMatrixIndex, shape->boneMatrices.data(), sizeof(float3x4) * numBoneMatrices); + boneMatrixIndex += numBoneMatrices; + } + + CD3DX12_RESOURCE_BARRIER barrier; + if (shape->vertexBuffer->GetTransitionBarrier(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, barrier)) + barriers.push_back(barrier); + } + + uint numBarriers = (uint)barriers.size(); + + if (numBarriers > 0) + commandList->ResourceBarrier(numBarriers, barriers.data()); + + vertexUpdateBuffer->UploadRegion(commandList, sizeof(VertexUpdateData) * shapeIndex, 0); + + boneMatricesBuffer->UploadRegion(commandList, sizeof(float3x4) * boneMatrixIndex, 0); + + numShapes = shapeIndex; + + return true; +} + +void SkinningPipeline::RestoreResources(ID3D12GraphicsCommandList4* commandList) +{ + // Barrier to NPSR state + barriers.clear(); + barriers.reserve(queuedShapes.size()); + + for (auto& [shape, queuedShape] : queuedShapes) { + CD3DX12_RESOURCE_BARRIER barrier; + if (shape->vertexBuffer->GetTransitionBarrier(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, barrier)) + barriers.push_back(barrier); + } + + const uint barrierCount = (uint)barriers.size(); + + if (barrierCount > 0) + commandList->ResourceBarrier(barrierCount, barriers.data()); +} + +void SkinningPipeline::ClearQueue() +{ + queuedShapes.clear(); +} + +void SkinningPipeline::Dispatch(ID3D12GraphicsCommandList4* commandList, ID3D12Device5* device) +{ + if (recompile) + CompileShaders(device); + + if (!frameChecker.IsNewFrame()) + return; + + uint count = 0; + uint vertexCount = 0; + + if (!PrepareResources(commandList, count, vertexCount)) + return; + + commandList->SetPipelineState(pipelineState.get()); + commandList->SetComputeRootSignature(rootSignature.get()); + + auto* pHeap = heap->Heap(); + commandList->SetDescriptorHeaps(1, &pHeap); + + commandList->SetComputeRootDescriptorTable(0, heap->TableGPUHandle(SkinningHeap::Table::UAV)); + + commandList->SetComputeRootDescriptorTable(1, heap->TableGPUHandle(SkinningHeap::Table::SRV)); + + commandList->SetComputeRootDescriptorTable(2, heap->TableGPUHandle(SkinningHeap::Table::DynamicBuffer)); + + commandList->SetComputeRootDescriptorTable(3, heap->TableGPUHandle(SkinningHeap::Table::VertexBuffer)); + + commandList->SetComputeRootDescriptorTable(4, heap->TableGPUHandle(SkinningHeap::Table::SkinningBuffer)); + + const uint vertexDispatchSize = DivideRoundUp(vertexCount, settings.ThreadGroupSize); + commandList->Dispatch(count, vertexDispatchSize, 1); + + RestoreResources(commandList); + + ClearQueue(); +} \ No newline at end of file diff --git a/src/Features/Raytracing/Pipelines/SkinningPipeline.h b/src/Features/Raytracing/Pipelines/SkinningPipeline.h new file mode 100644 index 0000000000..8b069226ac --- /dev/null +++ b/src/Features/Raytracing/Pipelines/SkinningPipeline.h @@ -0,0 +1,93 @@ +#pragma once + +#include "PCH.h" + +#include + +#include "Features/Raytracing/Buffer.h" +#include "Features/Raytracing/Heap.h" +#include "Features/Raytracing/HeapManager.h" +#include "Features/Raytracing/Pipeline.h" +#include "Features/Raytracing/RTConstants.h" +#include "Features/Raytracing/Types.h" + +#include "Features/Raytracing/Core/Shape.h" + +#include "Raytracing/Includes/RT/SHaRC/SharcTypes.h" +#include "Raytracing/Includes/Types/FrameData.hlsli" +#include "Raytracing/Includes/Types/VertexUpdate.hlsli" + +struct SkinningHeapDef +{ + enum class Table + { + UAV, + SRV, + DynamicBuffer, + VertexBuffer, + SkinningBuffer + }; + + enum class Slot + { + Output, + UpdateData = Output + RTConstants::MAX_SHAPES, + BoneMatrices, + DynamicVertices, + Vertices = DynamicVertices + RTConstants::MAX_SHAPES, + SkinningData = Vertices + RTConstants::MAX_SHAPES, + NumDescriptors = SkinningData + RTConstants::MAX_SHAPES, + None + }; +}; +using SkinningHeap = Heap; + +struct SkinningPipeline : ComputePipeline +{ + static constexpr uint MIN_THREAD_GROUP_SIZE = 4; + static constexpr uint MAX_THREAD_GROUP_SIZE = 64; + + static constexpr uint MAX_BATCHES = 4; + + static constexpr uint MAX_GEOMETRY = 2048; + + static constexpr uint MAX_BONE_MATRICES = MAX_GEOMETRY * 10; + + struct Settings + { + bool OptimizedMapping = false; + uint ThreadGroupSize = 32; + } settings; + + struct QueuedShape + { + Shape::Flags updateFlags; + eastl::string path; + }; + + eastl::unordered_map queuedShapes; + + eastl::unique_ptr> vertexUpdateBuffer = nullptr; + eastl::unique_ptr> boneMatricesBuffer = nullptr; + + VertexUpdateData* vertexUpdateData = nullptr; + float3x4* boneMatricesData = nullptr; + + //eastl::vector vertexUpdateData; + //eastl::vector boneMatricesData; + + eastl::vector barriers; + + Util::FrameChecker frameChecker; + + bool recompile; + + void CreateRootSignature(ID3D12Device5* device) override; + void CompileShaders(ID3D12Device5* device) override; + void SetupResources(ID3D12Device5* device) override; + void QueueUpdate(Shape::Flags updateFlags, eastl::string name, Shape* shape); + bool PrepareResources(ID3D12GraphicsCommandList4* commandList, uint& count, uint& vertexCount); + void RestoreResources(ID3D12GraphicsCommandList4* commandList); + void ClearQueue(); + void Dispatch(ID3D12GraphicsCommandList4* commandList, ID3D12Device5* device); +}; \ No newline at end of file diff --git a/src/Features/Raytracing/RE/CellAttachDetachEvent.h b/src/Features/Raytracing/RE/CellAttachDetachEvent.h new file mode 100644 index 0000000000..98fb7e1313 --- /dev/null +++ b/src/Features/Raytracing/RE/CellAttachDetachEvent.h @@ -0,0 +1,20 @@ +#pragma once + +#include "PCH.h" + +namespace RE +{ + struct CellAttachDetachEvent + { + enum Status : std::uint32_t + { + StartAttach = 0, + FinishAttach = 1, + StartDetach = 2, + FinishDetach = 3 + }; + + RE::TESObjectCELL* cell; // 00 + Status status; // 08 0 start attach 1 finish attach 2 start detach 3 finish detach + }; +} \ No newline at end of file diff --git a/src/Features/Raytracing/RTConstants.h b/src/Features/Raytracing/RTConstants.h new file mode 100644 index 0000000000..cd23984519 --- /dev/null +++ b/src/Features/Raytracing/RTConstants.h @@ -0,0 +1,22 @@ +#pragma once + +#include "PCH.h" + +namespace RTConstants +{ + // DX12 will not like if we don't respect these numbers and try to write over the resource end + static constexpr uint MAX_TEXTURES = 4096; + static constexpr uint MAX_MODELS = 1536; + static constexpr uint MAX_SHAPES = MAX_MODELS * 6; + static constexpr uint MAX_MATERIALS = MAX_SHAPES; + static constexpr uint MAX_TRANSFORMS = MAX_SHAPES; + static constexpr uint MAX_INSTANCES = 4096; + static constexpr uint MAX_LIGHTS = 255; + + static constexpr uint SKY_CUBEMAP_SIZE = 256; + static constexpr uint SKY_HEMI_SIZE = SKY_CUBEMAP_SIZE * 2; + + static constexpr uint PLAYER_REFR_FORMID = 0x00000014; + + static constexpr uint MATERIAL_NORMALMAP_ID = 1; +} \ No newline at end of file diff --git a/src/Features/Raytracing/RTPipelineBuilder.h b/src/Features/Raytracing/RTPipelineBuilder.h new file mode 100644 index 0000000000..59f64b70bd --- /dev/null +++ b/src/Features/Raytracing/RTPipelineBuilder.h @@ -0,0 +1,218 @@ +#pragma once + +#include "Features/Raytracing/HeapManager.h" +#include "Features/Raytracing/ShaderBindingTable.h" +#include +#include +#include +#include +#include +#include + +namespace DX12 +{ + class RTPipelineBuilder + { + public: + enum ExportType + { + RayGeneration, + Miss, + Hit, + AnyHit + }; + + void AddRayGenLib(IDxcBlob* shaderBlob, const eastl::wstring& exportName, const eastl::wstring& renameFrom = L"main") + { + AddLibrary(shaderBlob, exportName, renameFrom, ExportType::RayGeneration); + } + + void AddMissLib(IDxcBlob* shaderBlob, const eastl::wstring& exportName, const eastl::wstring& renameFrom = L"main") + { + AddLibrary(shaderBlob, exportName, renameFrom, ExportType::Miss); + } + + void AddHitLib(IDxcBlob* shaderBlob, const eastl::wstring& exportName, const eastl::wstring& renameFrom = L"main") + { + AddLibrary(shaderBlob, exportName, renameFrom, ExportType::Hit); + } + + void AddAnyHitLib(IDxcBlob* shaderBlob, const eastl::wstring& exportName, const eastl::wstring& renameFrom = L"main") + { + AddLibrary(shaderBlob, exportName, renameFrom, ExportType::AnyHit); + } + + // Add a DXIL library (shader blob) with exports + void AddLibrary(IDxcBlob* shaderBlob, const eastl::wstring& exportName, const eastl::wstring& renameFrom, const ExportType& exportType) + { + // Store export string + exportedNames.push_back(eastl::make_unique(exportName)); + renameFromNames.push_back(eastl::make_unique(renameFrom)); + + if (exportType == ExportType::RayGeneration) + rayGenNames.push_back(exportName); + else if (exportType == ExportType::Miss) + missNames.push_back(exportName); + + // Prepare export descriptor + dxilExportStorage.emplace_back(eastl::make_unique( + exportedNames.back()->c_str(), + renameFrom.empty() ? nullptr : renameFromNames.back()->c_str(), + D3D12_EXPORT_FLAG_NONE)); + + // Store DXIL library descriptor + dxilLibStorage.emplace_back(eastl::make_unique( + D3D12_SHADER_BYTECODE{ + shaderBlob->GetBufferPointer(), + shaderBlob->GetBufferSize(), + }, + 1, + dxilExportStorage.back().get())); + + // Subobject + subobjects.push_back({ .Type = D3D12_STATE_SUBOBJECT_TYPE_DXIL_LIBRARY, + .pDesc = dxilLibStorage.back().get() }); + } + + // Add a hit group + void AddHitGroup(const eastl::wstring& hitGroupName, const eastl::wstring& closestHit = L"", const eastl::wstring& anyHit = L"", const eastl::wstring& intersection = L"") + { + // Store hit group name for lifetime + hitGroupNames.push_back(hitGroupName); + + if (!closestHit.empty()) + closestHitNames.push_back(closestHit); + + if (!anyHit.empty()) + anyHitNames.push_back(anyHit); + + if (!intersection.empty()) + intersectionNames.push_back(intersection); + + hitGroupStorage.emplace_back(eastl::make_unique( + hitGroupNames.back().c_str(), + D3D12_HIT_GROUP_TYPE_TRIANGLES, + anyHit.empty() ? nullptr : anyHitNames.back().c_str(), + closestHit.empty() ? nullptr : closestHitNames.back().c_str(), + intersection.empty() ? nullptr : intersectionNames.back().c_str())); + + subobjects.push_back({ .Type = D3D12_STATE_SUBOBJECT_TYPE_HIT_GROUP, + .pDesc = hitGroupStorage.back().get() }); + } + + // Shader config + void AddShaderConfig(UINT maxPayloadSizeInBytes, UINT maxAttributeSizeInBytes) + { + shaderConfigStorage.emplace_back(eastl::make_unique(maxPayloadSizeInBytes, maxAttributeSizeInBytes)); + + subobjects.push_back({ .Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_SHADER_CONFIG, + .pDesc = shaderConfigStorage.back().get() }); + } + + // Global root signature + void AddGlobalRootSignature(ID3D12RootSignature* rootSignature) + { + globalRootStorage.emplace_back(eastl::make_unique(rootSignature)); + + subobjects.push_back({ .Type = D3D12_STATE_SUBOBJECT_TYPE_GLOBAL_ROOT_SIGNATURE, + .pDesc = globalRootStorage.back().get() }); + } + + // Pipeline config + void AddPipelineConfig(UINT maxRecursion) + { + pipelineConfigStorage.emplace_back(eastl::make_unique(maxRecursion)); + + subobjects.push_back({ .Type = D3D12_STATE_SUBOBJECT_TYPE_RAYTRACING_PIPELINE_CONFIG, + .pDesc = pipelineConfigStorage.back().get() }); + } + + // Build final state object descriptor + D3D12_STATE_OBJECT_DESC* MakeStateObjectDesc(D3D12_STATE_OBJECT_TYPE type = D3D12_STATE_OBJECT_TYPE_RAYTRACING_PIPELINE) + { + stateObjectDesc = eastl::make_unique( + type, + static_cast(subobjects.size()), + subobjects.data()); + + return stateObjectDesc.get(); + } + + inline std::string ToUtf8(const eastl::wstring& wstr) + { + if (wstr.empty()) + return std::string(); + + int size_needed = ::WideCharToMultiByte( + CP_UTF8, // convert to UTF-8 + 0, // no special flags + wstr.c_str(), // source wide string + static_cast(wstr.size()), + nullptr, // no output buffer yet + 0, + nullptr, + nullptr); + + std::string result(size_needed, 0); + ::WideCharToMultiByte( + CP_UTF8, + 0, + wstr.c_str(), + static_cast(wstr.size()), + result.data(), + size_needed, + nullptr, + nullptr); + + return result; + } + + ShaderBindingTable CreateShaderBindingTable(ID3D12StateObjectProperties* pipelineProps) + { + ShaderBindingTable shaderBindingTable; + + auto writeRecords = [&](const eastl::vector& names, ShaderTableSection& shaderTableSection) { + for (const auto& name : names) { + logger::debug("[RT] Shader Identifier: {}", ToUtf8(name).c_str()); + + ShaderRecord shaderRecord(pipelineProps->GetShaderIdentifier(name.c_str()), D3D12_SHADER_IDENTIFIER_SIZE_IN_BYTES); + shaderTableSection.AddRecord(shaderRecord); + } + }; + + logger::debug("[RT] Writting Raygen Records"); + writeRecords(rayGenNames, shaderBindingTable.RayGen); + + logger::debug("[RT] Writting Miss Records"); + writeRecords(missNames, shaderBindingTable.Miss); + + logger::debug("[RT] Writting HitGroup Records"); + writeRecords(hitGroupNames, shaderBindingTable.HitGroup); + + return shaderBindingTable; + } + + private: + // Storage for lifetime management + eastl::vector> renameFromNames; + eastl::vector> exportedNames; + + eastl::vector rayGenNames; + eastl::vector missNames; + eastl::vector hitGroupNames; + + eastl::vector closestHitNames; + eastl::vector anyHitNames; + eastl::vector intersectionNames; + + eastl::vector> dxilExportStorage; + eastl::vector> dxilLibStorage; + eastl::vector> hitGroupStorage; + eastl::vector> shaderConfigStorage; + eastl::vector> globalRootStorage; + eastl::vector> pipelineConfigStorage; + eastl::vector subobjects; + + eastl::unique_ptr stateObjectDesc = nullptr; + }; +} diff --git a/src/Features/Raytracing/ShaderBindingTable.h b/src/Features/Raytracing/ShaderBindingTable.h new file mode 100644 index 0000000000..0a30c79f1a --- /dev/null +++ b/src/Features/Raytracing/ShaderBindingTable.h @@ -0,0 +1,260 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace DX12 +{ + // Utility for alignment + constexpr UINT64 Align(UINT64 size, UINT64 alignment) + { + return (size + alignment - 1) & ~(alignment - 1); + } + + // + // 1. ShaderRecord + // ------------------------------- + class ShaderRecord + { + public: + ShaderRecord() = default; + + ShaderRecord(void* shaderID, UINT shaderIDSize, const void* localArgs = nullptr, UINT localArgsSize = 0) + { + assert(shaderID); + + m_data.resize(shaderIDSize + localArgsSize); + memcpy(m_data.data(), shaderID, shaderIDSize); + + if (localArgs && localArgsSize > 0) + memcpy(m_data.data() + shaderIDSize, localArgs, localArgsSize); + } + + UINT Size() const { return static_cast(m_data.size()); } + const void* Data() const { return m_data.data(); } + + private: + std::vector m_data; + }; + + // + // 2. ShaderTableSection + // ------------------------------- + class ShaderTableSection + { + public: + void AddRecord(const ShaderRecord& record) + { + m_records.push_back(record); + } + + UINT RecordCount() const { return static_cast(m_records.size()); } + + UINT64 RecordSize() const + { + if (m_records.empty()) + return 0; + + UINT64 size = m_records[0].Size(); + return Align(size, D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT); + } + + UINT64 SectionSize() const + { + return Align(RecordCount() * RecordSize(), D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT); + } + + size_t NumRecords() const { return m_records.size(); } + + void CopyTo(uint8_t* dest) const + { + if (m_records.empty()) + return; + + UINT64 stride = RecordSize(); + + for (const auto& r : m_records) { + memcpy(dest, r.Data(), r.Size()); + if (r.Size() < stride) { + memset(dest + r.Size(), 0, stride - r.Size()); + } + dest += stride; + } + } + + bool Empty() const { return m_records.empty(); } + + private: + std::vector m_records; + }; + + // + // 3. ShaderBindingTable + // ------------------------------- + class ShaderBindingTable + { + public: + ShaderTableSection RayGen; + ShaderTableSection Miss; + ShaderTableSection HitGroup; + ShaderTableSection Callable; + + UINT64 GetTotalSize() const + { + UINT64 size = 0; + + UINT64 rayGenSize = RayGen.SectionSize(); + size = Align(size + rayGenSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + + UINT64 missSize = Miss.SectionSize(); + size = Align(size + missSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + + UINT64 hitSize = HitGroup.SectionSize(); + size = Align(size + hitSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + + UINT64 callSize = Callable.SectionSize(); + size = Align(size + callSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + + return size; + } + + void Build(void* pData) + { + assert(pData && "ShaderBindingTable::Build - pData cannot be nullptr"); + + uint8_t* pDataUint8 = static_cast(pData); + + auto copyAndAdvance = [&](const ShaderTableSection& section, UINT64& offset) { + if (!section.Empty()) { + section.CopyTo(pDataUint8 + offset); + offset = Align(offset + section.SectionSize(), D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT); + } + }; + + // Start at offset 0 + UINT64 offset = 0; + + m_rayGenOffset = offset; + copyAndAdvance(RayGen, offset); + + m_missOffset = offset; + copyAndAdvance(Miss, offset); + + m_hitOffset = offset; + copyAndAdvance(HitGroup, offset); + + m_callOffset = offset; + copyAndAdvance(Callable, offset); + + m_sbtSize = offset; + } + + void LogShaderBindingTable(D3D12_GPU_VIRTUAL_ADDRESS baseAddr) + { + logger::debug( + "[RT] SBT Layout:\n" + " Base GPU VA: 0x{:016X}\n" + " RayGen:\n" + " Offset: {}\n" + " SectionSize: {}\n" + " RecordSize: {}\n" + " NumRecords: {}\n" + " GPU VA: 0x{:016X}\n" + " Miss:\n" + " Offset: {}\n" + " SectionSize: {}\n" + " RecordSize: {}\n" + " NumRecords: {}\n" + " GPU VA: 0x{:016X}\n" + " HitGroup:\n" + " Offset: {}\n" + " SectionSize: {}\n" + " RecordSize: {}\n" + " NumRecords: {}\n" + " GPU VA: 0x{:016X}\n" + " Callable:\n" + " Offset: {}\n" + " SectionSize: {}\n" + " RecordSize: {}\n" + " NumRecords: {}\n" + " GPU VA: 0x{:016X}\n", + baseAddr, + + // RayGen + m_rayGenOffset, + RayGen.SectionSize(), + RayGen.RecordSize(), + RayGen.NumRecords(), + baseAddr + m_rayGenOffset, + + // Miss + m_missOffset, + Miss.SectionSize(), + Miss.RecordSize(), + Miss.NumRecords(), + baseAddr + m_missOffset, + + // HitGroup + m_hitOffset, + HitGroup.SectionSize(), + HitGroup.RecordSize(), + HitGroup.NumRecords(), + baseAddr + m_hitOffset, + + // Callable + m_callOffset, + Callable.SectionSize(), + Callable.RecordSize(), + Callable.NumRecords(), + baseAddr + m_callOffset); + } + + void FillDispatchShaderBindingTable(D3D12_DISPATCH_RAYS_DESC& desc, D3D12_GPU_VIRTUAL_ADDRESS baseAddr) + { + desc.RayGenerationShaderRecord = { + baseAddr + m_rayGenOffset, + RayGen.SectionSize() + }; + + if (Miss.Empty()) { + desc.MissShaderTable = {}; + } else { + desc.MissShaderTable = { + baseAddr + m_missOffset, + Miss.SectionSize(), + Miss.RecordSize() + }; + } + + if (HitGroup.Empty()) { + desc.HitGroupTable = {}; + } else { + desc.HitGroupTable = { + baseAddr + m_hitOffset, + HitGroup.SectionSize(), + HitGroup.RecordSize() + }; + } + + if (Callable.Empty()) { + desc.CallableShaderTable = {}; + } else { + desc.CallableShaderTable = { + baseAddr + m_callOffset, + Callable.SectionSize(), + Callable.RecordSize() + }; + } + } + + private: + UINT64 m_rayGenOffset = 0; + UINT64 m_missOffset = 0; + UINT64 m_hitOffset = 0; + UINT64 m_callOffset = 0; + UINT64 m_sbtSize = 0; + }; +} diff --git a/src/Features/Raytracing/ShaderUtils.cpp b/src/Features/Raytracing/ShaderUtils.cpp new file mode 100644 index 0000000000..15d18e9678 --- /dev/null +++ b/src/Features/Raytracing/ShaderUtils.cpp @@ -0,0 +1,93 @@ +#pragma once + +#include "ShaderUtils.h" +#include +#include + +namespace ShaderUtils +{ + void CompileShader(winrt::com_ptr& shader, const wchar_t* FilePath, eastl::vector defines, const wchar_t* Target, const wchar_t* EntryPoint) + { + if (FAILED(CoInitializeEx(nullptr, COINIT_MULTITHREADED))) { + logger::error("Failed to initialize COM"); + return; + } + + std::string str = Util::WStringToString(FilePath); + + if (!std::filesystem::exists(FilePath)) { + logger::error("Failed to compile shader; {} does not exist", str); + return; + } + + winrt::com_ptr utils; + if (FAILED(DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(&utils)))) { + logger::error("Failed to create DxcUtils"); + return; + } + + winrt::com_ptr compiler; + if (FAILED(DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(&compiler)))) { + logger::error("Failed to create DxcCompiler"); + return; + } + + winrt::com_ptr source; + if (FAILED(utils->LoadFile(FilePath, nullptr, source.put()))) { + logger::error("Failed to load shader file"); + return; + } + + winrt::com_ptr baseHandler; + if (FAILED(utils->CreateDefaultIncludeHandler(baseHandler.put()))) { + logger::error("Failed to create Include Handler"); + return; + } + + DxcBuffer sourceBuffer; + sourceBuffer.Ptr = source->GetBufferPointer(); + sourceBuffer.Size = source->GetBufferSize(); + sourceBuffer.Encoding = DXC_CP_ACP; + + LPCWSTR args[] = { + FilePath, + L"-E", + EntryPoint, + L"-enable-16bit-types", + L"-T", + Target, + L"-I", + L"Data\\Shaders", + L"-Zi", + L"-Qstrip_reflect", + L"-O3", + }; + + winrt::com_ptr compilerArgs; + DxcCreateInstance(CLSID_DxcCompilerArgs, IID_PPV_ARGS(&compilerArgs)); + + compilerArgs->AddArguments(args, _countof(args)); + compilerArgs->AddDefines(defines.data(), static_cast(defines.size())); + + winrt::com_ptr result; + if (FAILED(compiler->Compile(&sourceBuffer, compilerArgs->GetArguments(), compilerArgs->GetCount(), baseHandler.get(), IID_PPV_ARGS(&result)))) { + logger::error("Compile call failed"); + return; + } + + winrt::com_ptr errors; + if (SUCCEEDED(result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(&errors), nullptr))) { + if (errors && errors->GetStringLength() > 0) { + logger::error("Shader compilation errors: {}", errors->GetStringPointer()); + } + } else { + logger::error("Failed to get compilation errors"); + return; + } + + if (FAILED(result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(&shader), nullptr))) { + logger::error("Failed to get compiled shader"); + return; + } + } +}; \ No newline at end of file diff --git a/src/Features/Raytracing/ShaderUtils.h b/src/Features/Raytracing/ShaderUtils.h new file mode 100644 index 0000000000..0db8f21561 --- /dev/null +++ b/src/Features/Raytracing/ShaderUtils.h @@ -0,0 +1,8 @@ +#pragma once + +#include + +namespace ShaderUtils +{ + void CompileShader(winrt::com_ptr& shader, const wchar_t* FilePath, eastl::vector defines = {}, const wchar_t* Target = L"lib_6_5", const wchar_t* EntryPoint = L"main"); +}; \ No newline at end of file diff --git a/src/Features/Raytracing/TextureSharing.h b/src/Features/Raytracing/TextureSharing.h new file mode 100644 index 0000000000..c1373c5d10 --- /dev/null +++ b/src/Features/Raytracing/TextureSharing.h @@ -0,0 +1,161 @@ +#pragma once + +#include "Features/Raytracing/Utils.h" +#include "PCH.h" +#include "TruePBR.h" +#include "TruePBR/BSLightingShaderMaterialPBR.h" +#include + +namespace TextureSharing +{ + enum Type : uint32_t + { + None, + Diffuse, + Normal, + Parallax, + Specular, + Skin, + Glow, + EnvMap, + EnvMask, + ModelSpaceNormal, + RMAOS, + Unkown + }; + + constexpr unsigned int FLAG_MASK = 0x80000000; + + static inline Type GetTextureType(const char* path) + { + if (!path) + return Type::None; + + auto pathLower = ToLower(path); + + std::filesystem::path fsPath(pathLower); + + std::string filename = fsPath.stem().string(); + + size_t pos = filename.rfind('_'); + + if (pos != std::string::npos) { + std::string suffix = filename.substr(pos + 1); + + if (suffix == "d") + return Type::Diffuse; + + if (suffix == "n") + return Type::Normal; + + if (suffix == "p") + return Type::Parallax; + + if (suffix == "s") + return Type::Specular; + + if (suffix == "g") + return Type::Glow; + + if (suffix == "sk") + return Type::Skin; + + if (suffix == "e") + return Type::EnvMap; + + if (suffix == "m" || suffix == "em") + return Type::EnvMask; + + if (suffix == "msn") + return Type::ModelSpaceNormal; + + if (suffix == "rmaos") + return Type::RMAOS; + } + + // Yes, everything else falls back as diffuse + return Type::Diffuse; + } + + static bool IsNumber(const std::string& s) + { + return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); + } + + static inline Type GetTextureTypeSafe(const char* path) + { + if (!path) + return Type::None; + + auto pathLower = ToLower(path); + + std::filesystem::path fsPath(pathLower); + + std::string filename = fsPath.stem().string(); + + size_t pos = filename.rfind('_'); + + if (pos != std::string::npos) { + std::string suffix = filename.substr(pos + 1); + + if (suffix == "d" || IsNumber(suffix)) + return Type::Diffuse; + + if (suffix == "n") + return Type::Normal; + + if (suffix == "p") + return Type::Parallax; + + if (suffix == "s") + return Type::Specular; + + if (suffix == "g") + return Type::Glow; + + if (suffix == "sk") + return Type::Skin; + + if (suffix == "e") + return Type::EnvMap; + + if (suffix == "m" || suffix == "em") + return Type::EnvMask; + + if (suffix == "msn") + return Type::ModelSpaceNormal; + + if (suffix == "rmaos") + return Type::RMAOS; + + logger::warn("[RT] GetTextureType - Unknown Suffix \"{}\"", suffix); + return Type::Unkown; + } + + return Type::Diffuse; + } + + static inline bool ShouldShareTexture(const char* path, [[maybe_unused]] bool pathTracing) + { + auto type = GetTextureType(path); + + switch (type) { + case TextureSharing::None: + case TextureSharing::Diffuse: + return true; + case TextureSharing::Normal: + case TextureSharing::Specular: + case TextureSharing::Glow: + case TextureSharing::EnvMask: + case TextureSharing::ModelSpaceNormal: + case TextureSharing::RMAOS: + return true; + default: + break; + } + + logger::trace("[RT] ShouldShareTexture {}", magic_enum::enum_name(type)); + + return false; + } +} \ No newline at end of file diff --git a/src/Features/Raytracing/Types.h b/src/Features/Raytracing/Types.h new file mode 100644 index 0000000000..1a86e59bd0 --- /dev/null +++ b/src/Features/Raytracing/Types.h @@ -0,0 +1,211 @@ +#pragma once + +#include + +#define FMT_STRUCT(name, ...) \ + template <> \ + struct fmt::formatter \ + { \ + constexpr auto parse(fmt::format_parse_context& ctx) { return ctx.begin(); } \ + template \ + auto format(const name& s, FormatContext& ctx) const \ + { \ + return fmt::format_to(ctx.out(), "{}", s.to_string()); \ + } \ + }; + +struct half +{ + DirectX::PackedVector::HALF v; + + half() = default; + half(const half&) = default; + half& operator=(const half&) = default; + + half(const float& fv) + { + v = DirectX::PackedVector::XMConvertFloatToHalf(fv); + } + + operator float() const + { + return DirectX::PackedVector::XMConvertHalfToFloat(v); + } + + half& operator+=(const half& rhs) + { + v = DirectX::PackedVector::XMConvertFloatToHalf(float(*this) + float(rhs)); + return *this; + } + + half& operator-=(const half& rhs) + { + v = DirectX::PackedVector::XMConvertFloatToHalf(float(*this) - float(rhs)); + return *this; + } + + half& operator*=(const half& rhs) + { + v = DirectX::PackedVector::XMConvertFloatToHalf(float(*this) * float(rhs)); + return *this; + } + + half& operator/=(const half& rhs) + { + v = DirectX::PackedVector::XMConvertFloatToHalf(float(*this) / float(rhs)); + return *this; + } +}; +static_assert(sizeof(half) == 2); + +struct half2 +{ + half x; + half y; + + half2() = default; + + constexpr half2(half _x, half _y) : + x(_x), y(_y) {} + + half2(float _x, float _y) : + x(_x), y(_y) {} + + half2(const float2& v) : + x(v.x), y(v.y) {} + + operator float2() const + { + return float2( + static_cast(x), + static_cast(y)); + } + + std::string to_string() const + { + return "[" + std::to_string(x) + ", " + std::to_string(y) + "]"; + } +}; +static_assert(sizeof(half2) == 4); + +FMT_STRUCT(half2, s.x, s.y) + +struct half3 +{ + half x; + half y; + half z; + + half3() = default; + + constexpr half3(half _x, half _y, half _z) : + x(_x), y(_y), z(_z) {} + + half3(float _x, float _y, float _z) : + x(_x), y(_y), z(_z) {} + + half3(const float3& v) : + x(v.x), y(v.y), z(v.z) {} + + operator float3() const + { + return float3( + static_cast(x), + static_cast(y), + static_cast(z)); + } + + half3& operator+=(const half3& rhs) + { + x += rhs.x; + y += rhs.y; + z += rhs.z; + return *this; + } + + std::string to_string() const + { + return "[" + std::to_string(x) + ", " + std::to_string(y) + ", " + std::to_string(z) + "]"; + } +}; +static_assert(sizeof(half3) == 6); + +FMT_STRUCT(half3, s.x, s.y, s.z) + +struct half4 +{ + half x; + half y; + half z; + half w; + + half4() = default; + + constexpr half4(half _x, half _y, half _z, half _w) : + x(_x), y(_y), z(_z), w(_w) {} + + half4(float _x, float _y, float _z, float _w) : + x(_x), y(_y), z(_z), w(_w) {} + + half4(const float4& v) : + x(v.x), y(v.y), z(v.z), w(v.w) {} + + operator float4() const + { + return float4( + static_cast(x), + static_cast(y), + static_cast(z), + static_cast(w)); + } + + std::string to_string() const + { + return "[" + std::to_string(x) + ", " + std::to_string(y) + ", " + std::to_string(z) + ", " + std::to_string(w) + "]"; + } +}; +static_assert(sizeof(half4) == 8); + +FMT_STRUCT(half4, s.x, s.y, s.z, s.w) + +struct uint2 +{ + uint x; + uint y; + + bool operator==(const uint2&) const = default; + bool operator!=(const uint2&) const = default; +}; +static_assert(sizeof(uint2) == 8); + +struct uint3 +{ + uint x; + uint y; + uint z; +}; +static_assert(sizeof(uint3) == 12); + +struct uint4 +{ + uint x; + uint y; + uint z; + uint w; +}; +static_assert(sizeof(uint4) == 16); + +typedef half4 float16_t4; + +struct AABB +{ + float3 center; + float3 size; + float3 extents; + + static AABB FromMinMax(float3 min, float3 max) + { + const float3 size = max - min; + return AABB((min + max) * 0.5f, size, size * 0.5f); + } +}; \ No newline at end of file diff --git a/src/Features/Raytracing/Utils.h b/src/Features/Raytracing/Utils.h new file mode 100644 index 0000000000..7b6606e5dd --- /dev/null +++ b/src/Features/Raytracing/Utils.h @@ -0,0 +1,350 @@ +#pragma once + +#include "PCH.h" + +#include "Features/Raytracing/Types.h" +#include "Features/Raytracing/RTConstants.h" + +#include "TruePBR.h" +#include "TruePBR/BSLightingShaderMaterialPBR.h" +#include + +static inline uint PackUByte4(float4 unpacked) +{ + auto x = (uint)(unpacked.x * 255.0f) & 0xFF; + auto y = (uint)(unpacked.y * 255.0f) & 0xFF; + auto z = (uint)(unpacked.z * 255.0f) & 0xFF; + auto w = (uint)(unpacked.w * 255.0f) & 0xFF; + + return (w << 24) | (z << 16) | (y << 8) | x; +} + +static inline float4 UnpackUByte4(uint packed) +{ + float4 result; + result.x = (packed & 0xFF) / 255.0f; + result.y = ((packed >> 8) & 0xFF) / 255.0f; + result.z = ((packed >> 16) & 0xFF) / 255.0f; + result.w = (packed >> 24) / 255.0f; + return result; +} + +static inline uint PackByte4(float4 unpacked) +{ + return PackUByte4(unpacked * 0.5f + float4(0.5f, 0.5f, 0.5f, 0.5f)); +} + +static inline float4 UnpackByte4(uint packed) +{ + return UnpackUByte4(packed) * 2.0f - float4(1.0f, 1.0f, 1.0f, 1.0f); +} + +static inline float3 Normalize(float3 vector) +{ + vector.Normalize(); + return vector; +} + +static inline ID3D11Resource* TryGetTexture(const RE::NiPointer niPointer) +{ + if (niPointer) { + if (const auto& bsTexture = niPointer->rendererTexture; bsTexture) { + return bsTexture->texture; + } + } + + return nullptr; +} + +static inline DirectX::XMMATRIX GetXMFromNiTransform(const RE::NiTransform& Transform) +{ + DirectX::XMMATRIX temp; + + const RE::NiMatrix3& m = Transform.rotate; + const float scale = Transform.scale; + + temp.r[0] = DirectX::XMVectorScale(DirectX::XMVectorSet( + m.entry[0][0], + m.entry[1][0], + m.entry[2][0], + 0.0f), + scale); + + temp.r[1] = DirectX::XMVectorScale(DirectX::XMVectorSet( + m.entry[0][1], + m.entry[1][1], + m.entry[2][1], + 0.0f), + scale); + + temp.r[2] = DirectX::XMVectorScale(DirectX::XMVectorSet( + m.entry[0][2], + m.entry[1][2], + m.entry[2][2], + 0.0f), + scale); + + temp.r[3] = DirectX::XMVectorSet( + Transform.translate.x, + Transform.translate.y, + Transform.translate.z, + 1.0f); + + return temp; +} + +static inline float3 Float3(const RE::NiPoint3& point3) +{ + return float3(point3.x, point3.y, point3.z); +} + +static inline float3 Float3(const RE::NiColor& color) +{ + return float3(color.red, color.green, color.blue); +} + + +static inline bool IsShareableFormat(DXGI_FORMAT format) +{ + switch (format) { + case DXGI_FORMAT_BC4_UNORM: + return false; + break; + case DXGI_FORMAT_BC4_SNORM: + return false; + break; + case DXGI_FORMAT_BC7_UNORM: + return false; + break; + case DXGI_FORMAT_BC7_UNORM_SRGB: + return false; + break; + default: + return true; + break; + } +} + +static inline DXGI_FORMAT GetCompatibleFormat(DXGI_FORMAT format, bool recompress) +{ + switch (format) { + case DXGI_FORMAT_BC4_UNORM: + return recompress ? DXGI_FORMAT_BC1_UNORM : DXGI_FORMAT_R8_UNORM; + break; + case DXGI_FORMAT_BC7_UNORM: + return recompress ? DXGI_FORMAT_BC3_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case DXGI_FORMAT_BC7_UNORM_SRGB: + return recompress ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + break; + default: + return format; + break; + } +} + +static inline bool ShouldShareTexture(RE::BSTextureSet::Texture a_texture, bool pathTracing) +{ + if (a_texture == RE::BSTextureSet::Texture::kDiffuse) + return true; + + if (pathTracing && a_texture == RE::BSTextureSet::Texture::kNormal) + return true; + + if (globals::truePBR->currentTextureSet == nullptr) { + if (a_texture == RE::BSTextureSet::Texture::kGlowMap) + return true; + } else { + if (a_texture == BSLightingShaderMaterialPBR::EmissiveTexture) + return true; + + if (pathTracing && a_texture == BSLightingShaderMaterialPBR::RmaosTexture) + return true; + } + + return false; +} + +static inline std::string ToLower(std::string s) +{ + std::transform(s.begin(), s.end(), s.begin(), + [](unsigned char c) { return std::tolower(c); }); + return s; +} + +static inline bool ShareableTexture(const char* path) +{ + if (!path) + return false; + + auto pathLower = ToLower(path); + + //if (pathLower.ends_with("_d.dds")) + // return true; + + if (pathLower.ends_with("_n.dds")) + return false; + + if (pathLower.ends_with("_p.dds")) + return false; + + if (pathLower.ends_with("_s.dds")) + return false; + + if (pathLower.ends_with("_sk.dds")) + return false; + + if (pathLower.ends_with("_msn.dds")) + return false; + + if (pathLower.ends_with("_rmaos.dds")) + return false; + + return true; +} + +template +static inline std::string GetFlagsString(auto value) +{ + static_assert( + magic_enum::customize::enum_range::is_flags, + "T must be a magic_enum flags enum"); + + using N = decltype(value); + + const auto& entries = magic_enum::enum_entries(); + + std::string flags = ""; + + for (const auto& [flag, name] : entries) { + if (value & static_cast(flag)) { + flags += fmt::format("{} ", name); + } + } + + return flags; +}; + +static uint32_t DivideRoundUp(uint32_t x, uint32_t divisor) +{ + return (x + divisor - 1) / divisor; +} + +static uint32_t DivideRoundUp(uint32_t x, float divisor) +{ + return static_cast(ceil(x / divisor)); +} + +static eastl::unique_ptr CreateTexture2D(uint2 size, DXGI_FORMAT format, uint bindFlags, DXGI_FORMAT srvFormat = DXGI_FORMAT_UNKNOWN, DXGI_FORMAT uavFormat = DXGI_FORMAT_UNKNOWN) +{ + D3D11_TEXTURE2D_DESC texDesc{}; + texDesc.Width = size.x; + texDesc.Height = size.y; + texDesc.MipLevels = 1; + texDesc.ArraySize = 1; + texDesc.Format = format; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.BindFlags = bindFlags; + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = srvFormat != DXGI_FORMAT_UNKNOWN ? srvFormat : format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = texDesc.MipLevels } + }; + + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { + .Format = uavFormat != DXGI_FORMAT_UNKNOWN ? uavFormat : format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + + auto texture2D = eastl::make_unique(texDesc); + + if (bindFlags & D3D11_BIND_SHADER_RESOURCE) + texture2D->CreateSRV(srvDesc); + + if (bindFlags & D3D11_BIND_UNORDERED_ACCESS) + texture2D->CreateUAV(uavDesc); + + return texture2D; +}; + +static void CreateTexture2DUAV(ID3D12Device5* device, ID3D12Resource* resource, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) +{ + auto desc = resource->GetDesc(); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uavDesc.Format = desc.Format; + + device->CreateUnorderedAccessView(resource, nullptr, &uavDesc, handle); +} + +static void CreateTexture2DSRV(ID3D12Device5* device, ID3D12Resource* resource, CD3DX12_CPU_DESCRIPTOR_HANDLE handle) +{ + auto desc = resource->GetDesc(); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = desc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = 0; + srvDesc.Texture2D.MipLevels = desc.MipLevels; + srvDesc.Texture2D.PlaneSlice = 0; + srvDesc.Texture2D.ResourceMinLODClamp = 0.0f; + + device->CreateShaderResourceView(resource, &srvDesc, handle); +} + +static inline float ShininessToRoughness(float shininess) +{ + // make sure shininess within valid range (0 - 1023), otherwise set to 1.0f + if (shininess <= 0.0f || shininess > 1023.0f) { + return 1.0f; + } + return std::pow(2.0f / (shininess + 2.0f), 0.25f); +} + +template +static void detour_thunk(size_t offset) +{ + T::func = REL::Module::get().base() + offset; + DetourTransactionBegin(); + DetourUpdateThread(GetCurrentThread()); + DetourAttach(reinterpret_cast(&T::func), reinterpret_cast(T::thunk)); + DetourTransactionCommit(); +} + +static bool IsPlayer(RE::FormID formID) +{ + return formID == RTConstants::PLAYER_REFR_FORMID; +}; + +static bool IsPlayer(RE::TESForm* form) +{ + return IsPlayer(form->GetFormID()); +}; + +static std::wstring ToWide(const std::string& str) +{ + if (str.empty()) + return std::wstring(); + + int size_needed = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), + (int)str.size(), nullptr, 0); + std::wstring wstr(size_needed, 0); + MultiByteToWideChar(CP_UTF8, 0, str.c_str(), + (int)str.size(), &wstr[0], size_needed); + return wstr; +} + +static RE::BSFadeNode* FindBSFadeNode(RE::NiNode* a_niNode) +{ + if (auto fadeNode = a_niNode->AsFadeNode()) { + return fadeNode; + } + return a_niNode->parent ? FindBSFadeNode(a_niNode->parent) : nullptr; +} \ No newline at end of file diff --git a/src/Features/Raytracing/magic_enum_spec.h b/src/Features/Raytracing/magic_enum_spec.h new file mode 100644 index 0000000000..c90837b635 --- /dev/null +++ b/src/Features/Raytracing/magic_enum_spec.h @@ -0,0 +1,44 @@ +#pragma once + +#include "PCH.h" + +#include "magic_enum/magic_enum.hpp" + +namespace magic_enum::customize +{ + template <> + struct enum_range + { + static constexpr bool is_flags = true; + }; + + template <> + struct enum_range + { + static constexpr bool is_flags = true; + }; + + template <> + struct enum_range + { + static constexpr bool is_flags = true; + }; + + template <> + struct enum_range + { + static constexpr bool is_flags = true; + }; + + template <> + struct enum_range + { + static constexpr bool is_flags = true; + }; + + template <> + struct enum_range + { + static constexpr bool is_flags = true; + }; +} \ No newline at end of file diff --git a/src/Globals.cpp b/src/Globals.cpp index c5883bfc24..8ae7d9ecff 100644 --- a/src/Globals.cpp +++ b/src/Globals.cpp @@ -15,6 +15,7 @@ #include "Features/LightLimitFix.h" #include "Features/LinearLighting.h" #include "Features/PerformanceOverlay.h" +#include "Features/Raytracing.h" #include "Features/RenderDoc.h" #include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" @@ -33,6 +34,7 @@ #include "Features/WeatherEditor.h" #include "Features/WeatherPicker.h" #include "Features/WetnessEffects.h" + #include "Menu.h" #include "ShaderCache.h" #include "State.h" @@ -82,6 +84,7 @@ namespace globals Upscaling upscaling{}; RenderDoc renderDoc{}; WeatherEditor weatherEditor{}; + Raytracing raytracing{}; namespace llf { diff --git a/src/Globals.h b/src/Globals.h index e481b9f56e..d6b94a48db 100644 --- a/src/Globals.h +++ b/src/Globals.h @@ -31,6 +31,7 @@ struct WetnessEffects; struct ExtendedTranslucency; struct Upscaling; struct WeatherEditor; +struct Raytracing; class State; class Deferred; @@ -87,6 +88,7 @@ namespace globals extern Upscaling upscaling; extern RenderDoc renderDoc; extern WeatherEditor weatherEditor; + extern Raytracing raytracing; namespace llf { diff --git a/src/Hooks.cpp b/src/Hooks.cpp index d29a3f04ad..01d9351ec4 100644 --- a/src/Hooks.cpp +++ b/src/Hooks.cpp @@ -12,6 +12,7 @@ #include "Features/InteriorSun.h" #include "Features/LightLimitFix.h" +#include "Features/Raytracing.h" #include "Features/TerrainHelper.h" #include "Features/Upscaling.h" #include "Features/VR.h" @@ -291,6 +292,8 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChain( pAdapter->GetDesc(&adapterDesc); globals::state->SetAdapterDescription(adapterDesc.Description); + //Flags |= D3D11_CREATE_DEVICE_DEBUG; + const D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_1; auto ret = ptrD3D11CreateDeviceAndSwapChain(pAdapter, @@ -306,6 +309,10 @@ HRESULT WINAPI hk_D3D11CreateDeviceAndSwapChain( pFeatureLevel, ppImmediateContext); + if (globals::features::raytracing.loaded) { + globals::features::raytracing.InitD3D12(*ppDevice, *ppImmediateContext, pAdapter); + } + return ret; } @@ -624,8 +631,8 @@ namespace Hooks { static void thunk(RE::BSGraphics::Renderer* This, uint32_t a_target, RE::BSGraphics::CubeMapRenderTargetProperties* a_properties) { - a_properties->height = 128; - a_properties->width = 128; + a_properties->height = CubemapResolution(); + a_properties->width = CubemapResolution(); func(This, a_target, a_properties); } static inline REL::Relocation func; @@ -635,8 +642,8 @@ namespace Hooks { static void thunk(RE::BSGraphics::Renderer* This, uint32_t a_target, RE::BSGraphics::DepthStencilTargetProperties* a_properties) { - a_properties->height = 128; - a_properties->width = 128; + a_properties->height = CubemapResolution(); + a_properties->width = CubemapResolution(); func(This, a_target, a_properties); } static inline REL::Relocation func; @@ -954,4 +961,15 @@ namespace Hooks logger::info("Hooking CreateDXGIFactory"); *(uintptr_t*)&ptrCreateDXGIFactory = SKSE::PatchIAT(hk_CreateDXGIFactory, "dxgi.dll", !REL::Module::IsVR() ? "CreateDXGIFactory" : "CreateDXGIFactory1"); } + + uint CubemapResolution() + { + auto& rt = globals::features::raytracing; + + if (rt.loaded) { + return RTConstants::SKY_CUBEMAP_SIZE; + } else { + return 128; + } + } } \ No newline at end of file diff --git a/src/Hooks.h b/src/Hooks.h index 335a7df7d8..12c0b9d4ca 100644 --- a/src/Hooks.h +++ b/src/Hooks.h @@ -21,4 +21,5 @@ namespace Hooks void Install(); void InstallEarlyHooks(); + uint CubemapResolution(); } diff --git a/src/TruePBR.cpp b/src/TruePBR.cpp index 86afc11300..b80ac1e668 100644 --- a/src/TruePBR.cpp +++ b/src/TruePBR.cpp @@ -42,7 +42,7 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( #define CHECK_PBR_TEXTURE(textureName) \ if (!(pbrMaterial->textureName)) { \ - logger::warn("[TruePBR] {} missing {}; treating as nonPBR", pbrMaterial->inputFilePath, #textureName); \ + logger::debug("[TruePBR] {} missing {}; treating as nonPBR", pbrMaterial->inputFilePath, #textureName); \ return false; \ } @@ -558,7 +558,7 @@ struct BSLightingShaderProperty_LoadBinary bool isPbr = false; { RE::BSLightingShaderMaterialBase* material = nullptr; - if (property->flags.any(kMenuScreen)) { + if (property->flags.any(TruePBR::PBRFlag)) { auto* pbrMaterial = BSLightingShaderMaterialPBR::Make(); pbrMaterial->inputFilePath = stream.inputFilePath; pbrMaterial->loadedWithFeature = feature; diff --git a/src/TruePBR.h b/src/TruePBR.h index 2ead82c6e4..f6954d0c73 100644 --- a/src/TruePBR.h +++ b/src/TruePBR.h @@ -20,6 +20,8 @@ struct TruePBR return &singleton; } + static constexpr auto PBRFlag = RE::BSShaderProperty::EShaderPropertyFlag::kMenuScreen; + inline std::string GetShortName() { return "TruePBR"; } void DrawSettings(); diff --git a/src/Utils/D3D.cpp b/src/Utils/D3D.cpp index fa1d2710a4..337e3ed5d6 100644 --- a/src/Utils/D3D.cpp +++ b/src/Utils/D3D.cpp @@ -4,6 +4,7 @@ #include "Utils/Format.h" #include #include +#include #include #include @@ -124,7 +125,7 @@ namespace Util } }; - ID3D11DeviceChild* CompileShader(const wchar_t* FilePath, const std::vector>& Defines, const char* ProgramType, const char* Program) + ID3D11DeviceChild* CompileShader(const wchar_t* FilePath, const std::vector>& Defines, const char* ProgramType, const char* Program, const std::vector& InputDesc, ID3D11InputLayout** InputLayout) { auto device = globals::d3d::device; @@ -153,22 +154,37 @@ namespace Util for (unsigned int i = 0; i < shaderDefines->size(); i++) macros.push_back({ shaderDefines->at(i).first.c_str(), shaderDefines->at(i).second.c_str() }); } - if (!_stricmp(ProgramType, "ps_5_0")) + + D3D11_SHADER_VERSION_TYPE shaderType; + if (!_strnicmp(ProgramType, "ps_", 3)) { + shaderType = D3D11_SHVER_PIXEL_SHADER; + } else if (!_strnicmp(ProgramType, "vs_", 3)) { + shaderType = D3D11_SHVER_VERTEX_SHADER; + } else if (!_strnicmp(ProgramType, "gs_", 3)) { + shaderType = D3D11_SHVER_GEOMETRY_SHADER; + } else if (!_strnicmp(ProgramType, "hs_", 3)) { + shaderType = D3D11_SHVER_HULL_SHADER; + } else if (!_strnicmp(ProgramType, "ds_", 3)) { + shaderType = D3D11_SHVER_DOMAIN_SHADER; + } else if (!_strnicmp(ProgramType, "cs_", 3)) { + shaderType = D3D11_SHVER_COMPUTE_SHADER; + } else { + logger::error("Invalid ProgramType: {}", ProgramType); + return nullptr; + } + + if (shaderType == D3D11_SHVER_PIXEL_SHADER) macros.push_back({ "PSHADER", "" }); - else if (!_stricmp(ProgramType, "vs_5_0")) + else if (shaderType == D3D11_SHVER_VERTEX_SHADER) macros.push_back({ "VSHADER", "" }); - else if (!_stricmp(ProgramType, "hs_5_0")) + else if (shaderType == D3D11_SHVER_GEOMETRY_SHADER) + macros.push_back({ "GEOMETRYSHADER", "" }); + else if (shaderType == D3D11_SHVER_HULL_SHADER) macros.push_back({ "HULLSHADER", "" }); - else if (!_stricmp(ProgramType, "ds_5_0")) + else if (shaderType == D3D11_SHVER_DOMAIN_SHADER) macros.push_back({ "DOMAINSHADER", "" }); - else if (!_stricmp(ProgramType, "cs_5_0")) + else if (shaderType == D3D11_SHVER_COMPUTE_SHADER) macros.push_back({ "COMPUTESHADER", "" }); - else if (!_stricmp(ProgramType, "cs_4_0")) - macros.push_back({ "COMPUTESHADER", "" }); - else if (!_stricmp(ProgramType, "cs_5_1")) - macros.push_back({ "COMPUTESHADER", "" }); - else - return nullptr; // Add null terminating entry macros.push_back({ "WINPC", "" }); @@ -190,33 +206,40 @@ namespace Util logger::warn("Shader compilation failed:\n\n{}", shaderErrors ? static_cast(shaderErrors->GetBufferPointer()) : "Unknown error"); return nullptr; } + + auto inputCount = InputDesc.size(); + if (inputCount > 0 && InputLayout) { + device->CreateInputLayout(InputDesc.data(), static_cast(inputCount), shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), InputLayout); + } + if (shaderErrors) logger::debug("Shader logs:\n{}", static_cast(shaderErrors->GetBufferPointer())); - if (!_stricmp(ProgramType, "ps_5_0")) { + + if (shaderType == D3D11_SHVER_PIXEL_SHADER) { ID3D11PixelShader* regShader; device->CreatePixelShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, ®Shader); return regShader; - } else if (!_stricmp(ProgramType, "vs_5_0")) { + } else if (shaderType == D3D11_SHVER_VERTEX_SHADER) { ID3D11VertexShader* regShader; device->CreateVertexShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, ®Shader); return regShader; - } else if (!_stricmp(ProgramType, "hs_5_0")) { + } else if (shaderType == D3D11_SHVER_GEOMETRY_SHADER) { + ID3D11GeometryShader* regShader; + device->CreateGeometryShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, ®Shader); + return regShader; + } else if (shaderType == D3D11_SHVER_HULL_SHADER) { ID3D11HullShader* regShader; device->CreateHullShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, ®Shader); return regShader; - } else if (!_stricmp(ProgramType, "ds_5_0")) { + } else if (shaderType == D3D11_SHVER_DOMAIN_SHADER) { ID3D11DomainShader* regShader; device->CreateDomainShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, ®Shader); return regShader; - } else if (!_stricmp(ProgramType, "cs_5_0")) { + } else if (shaderType == D3D11_SHVER_COMPUTE_SHADER) { ID3D11ComputeShader* regShader; DX::ThrowIfFailed(device->CreateComputeShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, ®Shader)); return regShader; - } else if (!_stricmp(ProgramType, "cs_4_0")) { - ID3D11ComputeShader* regShader; - DX::ThrowIfFailed(device->CreateComputeShader(shaderBlob->GetBufferPointer(), shaderBlob->GetBufferSize(), nullptr, ®Shader)); - return regShader; - } + }; return nullptr; } diff --git a/src/Utils/D3D.h b/src/Utils/D3D.h index 216d29db92..8c4c5639f6 100644 --- a/src/Utils/D3D.h +++ b/src/Utils/D3D.h @@ -11,7 +11,7 @@ namespace Util std::string GetNameFromRTV(ID3D11RenderTargetView* a_rtv); void SetResourceName(ID3D11DeviceChild* Resource, const char* Format, ...); - ID3D11DeviceChild* CompileShader(const wchar_t* FilePath, const std::vector>& Defines, const char* ProgramType, const char* Program = "main"); + ID3D11DeviceChild* CompileShader(const wchar_t* FilePath, const std::vector>& Defines, const char* ProgramType, const char* Program = "main", const std::vector& InputDesc = {}, ID3D11InputLayout** InputLayout = nullptr); // Texture manipulation utilities void ApplyHighlightTintToTexture(ID3D11Texture2D* texture, bool isHighlighted, const std::array& highlightColor = { 1.0f, 0.5f, 0.0f, 0.3f }); diff --git a/vcpkg.json b/vcpkg.json index fcf47c7ba7..f41d914f53 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -13,6 +13,9 @@ "bshoshany-thread-pool", "clib-util", "cppwinrt", + "d3d12-memory-allocator", + "directx12-agility", + "directx-dxc", "directx-headers", "directxtex", "eastl",