Dynamic GI Spherical Harmonic from Spherical Gaussian Modes (#9)

pastasfuture · pastasfuture · commit 6012f07367a7 · 2021-09-14T18:50:10.000-07:00
* Implemented SH from SG Modes which can be toggled between in the Probe Dynamic GI volume settings. SamplePeakAndProject is the same mode we are used to: Spherical gaussians will simply be evaluated at their peak and projected to convert to spherical harmonics. SHFromSGFit: A spherical gaussian to spherical harmonic function fit is used, which is physically plausible. SHFromSGFitWithCosineWindow: A spherical gaussian with an additional cosine window to spherical harmonic function fit is used, which is physically plausible. Less directional blur than SHFromSGFit.

* Cleanup pass on SG to SH supporting math. Created Zonal Harmonic data type with supporting functions to add type saftey to the transforms as we have done with the other spherical harmonic functions. Created specialized zonal harmonic rotation functions which significantly reduces the work required for rotation. It's possible the compiler was handling this already by stripping out work on zero coefficients, and by automatically using floats instead of float3s when all channels are the same - but better to not put too much pressure on the compiler. This should also make it easier in the future to create specialized rotation functions for cardinal rotations (which will introduce additional zeros).

Co-authored-by: pastasfuture &lt;nickb@bonfirestudios.com&gt;
diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeDynamicGI.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeDynamicGI.cs
@@ -35,6 +35,8 @@ public class ProbeDynamicGI : VolumeComponent
         public ClampedFloatParameter propagationSharpness = new ClampedFloatParameter(2.0f, 0.0f, 16.0f);
         [Tooltip("Advanced control for the SG sharpness used when evaluating the influence of infinite bounce light near surfaces")]
         public ClampedFloatParameter infiniteBounceSharpness = new ClampedFloatParameter(2.0f, 0.0f, 16.0f);
+        [Tooltip("Advanced control for probe propagation combine pass.\nSamplePeakAndProject: Spherical gaussians will simply be evaluated at their peak and projected to convert to spherical harmonics.\nSHFromSGFit: A spherical gaussian to spherical harmonic function fit is used, which is physically plausible.\nSHFromSGFitWithCosineWindow: A spherical gaussian with an additional cosine window to spherical harmonic function fit is used, which is physically plausible. Less directional blur than SHFromSGFit.")]
+        public SHFromSGModeParameter shFromSGMode = new SHFromSGModeParameter(SHFromSGMode.SamplePeakAndProject);
         [Tooltip("Advanced control for darkening down the indirect light on invalid probes")]
         public ClampedFloatParameter leakMultiplier = new ClampedFloatParameter(0.0f, 0.0f, 1.0f);
         [Tooltip("Advanced control to bias the distance from the normal of the hit surface to perform direct lighting evaluation on")]
@@ -47,5 +49,20 @@ public class ProbeDynamicGI : VolumeComponent
 
         [Tooltip("Advanced control to clear all dynamic GI buffers in the event lighting blows up when tuning")]
         public BoolParameter clear = new BoolParameter(false);
+
+        [Serializable]
+        public enum SHFromSGMode
+        {
+            SamplePeakAndProject = 0,
+            SHFromSGFit,
+            SHFromSGFitWithCosineWindow
+        };
+
+        [Serializable]
+        public sealed class SHFromSGModeParameter : VolumeParameter<SHFromSGMode>
+        {
+            public SHFromSGModeParameter(SHFromSGMode value, bool overrideState = false)
+                : base(value, overrideState) {}
+        }
     }
 } // UnityEngine.Experimental.Rendering.HDPipeline
diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbePropagationCombine.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbePropagationCombine.compute
@@ -1,12 +1,16 @@
 #include "Packages/com.unity.render-pipelines.high-definition-config/Runtime/ShaderConfig.cs.hlsl"
 #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/EntityLighting.hlsl"
+#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/CommonLighting.hlsl"
 #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/ProbeVolumeRotate.hlsl"
 #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbePropagationGlobals.hlsl"
 #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeSphericalHarmonicsLighting.hlsl"
 #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeSphericalHarmonicsDeringing.hlsl"
 
 #pragma kernel CombinePropagationAxis
 
+#pragma multi_compile _ SH_FROM_SG_PBR_FIT SH_FROM_SG_PBR_FIT_WITH_COSINE_WINDOW
+#pragma multi_compile _ 
+
 #define GROUP_SIZE 64
 //#pragma enable_d3d11_debug_symbols
 
@@ -30,6 +34,8 @@ float _BakedLightingContribution;
 float _DynamicPropagationContribution;
 float4 _RayAxis[NEIGHBOR_AXIS_COUNT];
 
+float _PropagationSharpness;
+
 
 uint3 ComputeWriteIndexFromReadIndex(uint readIndex, float3 resolution)
 {
@@ -116,8 +122,7 @@ void WriteFinalSHOutgoingRadiosityWithProjectedConstantsPacked(uint3 writeIndex,
     _ProbeVolumeAtlasWriteTextureSH[uint3(writeIndex.x, writeIndex.y, writeIndex.z + _ProbeVolumeAtlasResolutionAndSliceCount.z * 6)] = float4(outgoingRadiosityProjectedConstantsPacked.data[6].xyz, validity);
 }
 
-
-SHIncomingIrradiance ProjectPropagationAxis(uint probeIndex)
+SHIncomingIrradiance ProjectPropagationAxisFromPeak(uint probeIndex)
 {
     SHIncomingIrradiance incomingIrradiance;
     ZERO_INITIALIZE(SHIncomingIrradiance, incomingIrradiance);
@@ -133,7 +138,102 @@ SHIncomingIrradiance ProjectPropagationAxis(uint probeIndex)
     }
 
     return incomingIrradiance;
+}
+
+// data for fit generated with: https://gist.github.com/pastasfuture/e1a7d80d6ed1104540b22edc15ce655a
+// Fit coefficient function fit generated in desmos (for non zero parameters c0, c2, and c6): https://www.desmos.com/calculator/rnrmjlz1jb
+// Reference implementation: https://www.shadertoy.com/view/7tsXzH
+//
+// Note: These ComputeSphericalHarmonicFromSphericalGaussian functions can be replaced with hardcoded constants once we make sharpness hardcoded.
+#if !defined(SH_FROM_SG_PBR_FIT_WITH_COSINE_WINDOW)
+float ZHWindowComputeFromSphericalGaussianC0(float sharpness)
+{
+    return pow(sharpness * 1.22962 + 0.823224, -1.25462) * 3.73236 + 0.0289582;
+}
+
+float ZHWindowComputeFromSphericalGaussianC1(float sharpness)
+{
+    return exp2(-3.78299 * pow(abs(sharpness * 0.766922 + -0.687697), -0.95733)) * -0.862788 + 0.851292;
+}
+
+float ComputeSigmoidSCurve(float x, float p)
+{
+    float a = exp2(x * p);
+    return a / (a + 1.0);
+}
 
+float ZHWindowComputeFromSphericalGaussianC2(float sharpness)
+{
+    return lerp(
+        sharpness * sharpness * -0.0326676 + sharpness * 0.225077,
+        ComputeSigmoidSCurve(sharpness * 15.3784 + -31.6152, 0.0162406) * -0.707591 + 0.85572,
+        saturate((sharpness - 0.800167) / (0.800167 - 4.57034))
+    );
+}
+#else
+float ZHWindowComputeFromSphericalGaussianC0(float sharpness)
+{
+    return pow(sharpness * 0.386254 + 1.55848, -1.52661) * 1.72542 + 0.0290483;
+}
+
+float ZHWindowComputeFromSphericalGaussianC1(float sharpness)
+{
+    return exp2(-8.6001 * pow(abs(sharpness * 1.53466 + 3.47572), -1.16397)) * -1.32848 + 1.34496;
+}
+
+float ZHWindowComputeFromSphericalGaussianC2(float sharpness)
+{
+    return exp2(-0.506072 * pow(abs(sharpness * 0.273738 + 0.296201), -1.06396)) * -0.506072 + 0.511033;
+}
+#endif
+
+ZHWindow ZHWindowComputeFromSphericalGaussian(float sharpness)
+{
+    ZHWindow zhWindow;
+    zhWindow.data[0] = ZHWindowComputeFromSphericalGaussianC0(sharpness);
+    zhWindow.data[1] = ZHWindowComputeFromSphericalGaussianC1(sharpness);
+    zhWindow.data[2] = ZHWindowComputeFromSphericalGaussianC2(sharpness);
+
+    return zhWindow;
+}
+
+SHIncomingIrradiance SHIncomingIrradianceComputeFromSphericalGaussian(float3 direction, float sharpness, float3 radiance)
+{
+    ZHWindow zhWindow = ZHWindowComputeFromSphericalGaussian(sharpness);
+    SHWindow shWindow = SHWindowComputeFromZHWindow(zhWindow, direction);
+    SHIncomingIrradiance irradiance = SHIncomingIrradianceComputeFromSHWindowAndRadiance(shWindow, radiance);
+
+    return irradiance;
+}
+
+SHIncomingIrradiance ProjectPropagationAxisFromFit(uint probeIndex)
+{
+    SHIncomingIrradiance incomingIrradiance;
+    ZERO_INITIALIZE(SHIncomingIrradiance, incomingIrradiance);
+
+    uint localIndex = probeIndex * NEIGHBOR_AXIS_COUNT;
+    for (int axis = 0; axis < NEIGHBOR_AXIS_COUNT; ++axis)
+    {
+        float3 radiance = _RadianceCacheAxis[localIndex].xyz;
+        float3 direction = _RayAxis[axis].xyz;
+
+        SHIncomingIrradiance incomingIrradianceCurrentSG = SHIncomingIrradianceComputeFromSphericalGaussian(direction, _PropagationSharpness, radiance);
+
+        SHIncomingIrradianceAccumulateFromSHIncomingIrradiance(incomingIrradiance, incomingIrradianceCurrentSG);
+
+        localIndex++;
+    }
+
+    return incomingIrradiance;
+}
+
+SHIncomingIrradiance ProjectPropagationAxis(uint probeIndex)
+{
+#if defined(SH_FROM_SG_PBR_FIT) || defined(SH_FROM_SG_PBR_FIT_WITH_COSINE_WINDOW)
+    return ProjectPropagationAxisFromFit(probeIndex);
+#else
+    return ProjectPropagationAxisFromPeak(probeIndex);
+#endif
 }
 
 
@@ -168,4 +268,3 @@ void CombinePropagationAxis(uint3 id : SV_DispatchThreadID)
         WriteFinalSHOutgoingRadiosityWithProjectedConstantsPacked(writeIndex, bakedOutgoingRadiosityProjectedConstantsPacked, validity);
     }
 }
-
diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeDynamicGI.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeDynamicGI.cs
@@ -480,6 +480,30 @@ void DispatchPropagationCombine(CommandBuffer cmd, ProbeVolumeHandle probeVolume
             }
             cmd.SetComputeVectorArrayParam(shader, "_RayAxis", s_NeighborAxis);
 
+            cmd.SetComputeFloatParam(shader, "_PropagationSharpness", giSettings.propagationSharpness.value);
+
+            switch (giSettings.shFromSGMode.value)
+            {
+                case ProbeDynamicGI.SHFromSGMode.SamplePeakAndProject:
+                {
+                    CoreUtils.SetKeyword(shader, "SH_FROM_SG_PBR_FIT", false);
+                    CoreUtils.SetKeyword(shader, "SH_FROM_SG_PBR_FIT_WITH_COSINE_WINDOW", false);
+                    break;
+                }
+                case ProbeDynamicGI.SHFromSGMode.SHFromSGFit:
+                {
+                    CoreUtils.SetKeyword(shader, "SH_FROM_SG_PBR_FIT", true);
+                    CoreUtils.SetKeyword(shader, "SH_FROM_SG_PBR_FIT_WITH_COSINE_WINDOW", false);
+                    break;
+                }
+                case ProbeDynamicGI.SHFromSGMode.SHFromSGFitWithCosineWindow:
+                {
+                    CoreUtils.SetKeyword(shader, "SH_FROM_SG_PBR_FIT", false);
+                    CoreUtils.SetKeyword(shader, "SH_FROM_SG_PBR_FIT_WITH_COSINE_WINDOW", true);
+                    break;
+                }
+                default: break;
+            }
 
             int dispatchX = (numProbes + 63) / 64;
             cmd.DispatchCompute(shader, kernel, dispatchX, 1, 1);
diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeSphericalHarmonicsDeringing.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeSphericalHarmonicsDeringing.hlsl
@@ -11,13 +11,6 @@ float ComputeLuminance(float3 color)
     return dot(color, float3(0.25, 0.5, 0.25));
 }
 
-// Dering the 9x float3 SH coefficients, and write out the corresponding shCoefficientIndex for the current pixel.
-// Needed for isolating individual color channels in deringing code.
-struct SHOutgoingRadiosityScalar
-{
-    float data[SH_COEFFICIENT_COUNT];
-};
-
 SHOutgoingRadiosityScalar SHOutgoingRadiosityReadColorChannel(SHOutgoingRadiosity shOutgoingRadiosity, int colorChannelIndex)
 {
     SHOutgoingRadiosityScalar shOutgoingRadiosityScalar;
@@ -46,168 +39,6 @@ void SHOutgoingRadiosityWriteColorChannel(inout SHOutgoingRadiosity shOutgoingRa
     }
 }
 
-void SHOutgoingRadiosityScalarRotateBand1(float3x3 M, inout float x[3])
-{
-    float3 SH = float3(-x[2], -x[0], x[1]);
-
-    x[0] = dot(SH, -float3(M[0][1], M[1][1], M[2][1]));
-    x[1] = dot(SH, float3(M[0][2], M[1][2], M[2][2]));
-    x[2] = dot(SH, -float3(M[0][0], M[1][0], M[2][0]));
-}
-
-void SHOutgoingRadiosityScalarRotateBand2(float3x3 M, inout float x[5])
-{
-    // Decomposed + factored version of 5x5 matrix multiply of invA * sh from source.
-    const float k0 = 0.9152912328637689;
-    const float k1 = 0.9152912328637689 * 2.0;
-    const float k2 = 1.5853309190424043;
-    float sh0 = x[1] * -0.5 + (x[3] * 0.5 + x[4]); // 2x MADD
-    float sh1 = (x[0] + (k2 / k0) * x[2] + x[3] + x[4]) * 0.5;
-    float sh2 = x[0];
-    float sh3 = -x[3];
-    float sh4 = -x[1];
-
-    const float k = 1.0 / sqrt(2.0);
-    const float kInv = sqrt(2.0);
-    const float k3 = k0 * 2.0 * K3SQRT5DIV4SQRTPI * k * k; // sqrt(3.0) / 2.0
-    const float k4 = k0 * 2.0 * -KALMOSTONETHIRD;
-
-    // Decomposed + factored version of 5x5 matrix multiply of 5 normals projected to 5 SH2 bands.
-    // Column 0
-    {
-        float3 rn0 = float3(M[0][0], M[0][1], M[0][2]) * kInv; // (float3(1, 0, 0) * M) / k;
-        x[0] = (rn0.x * rn0.y) * sh0;
-        x[1] = (-rn0.y * rn0.z) * sh0;
-        x[2] = (rn0.z * rn0.z * k3 + k4) * sh0;
-        x[3] = (-rn0.x * rn0.z) * sh0;
-        x[4] = (rn0.x * rn0.x - rn0.y * rn0.y) * sh0;
-    }
-
-    // Column 1
-    {
-        float3 rn1 = float3(M[2][0], M[2][1], M[2][2]) * kInv; // (float3(0, 0, 1) * M) / k;
-        x[0] += (rn1.x * rn1.y) * sh1;
-        x[1] += (-rn1.y * rn1.z) * sh1;
-        x[2] += (rn1.z * rn1.z * k3 + k4) * sh1;
-        x[3] += (-rn1.x * rn1.z) * sh1;
-        x[4] += (rn1.x * rn1.x - rn1.y * rn1.y) * sh1;
-    }
-
-    // Column 2
-    {
-        float3 rn2 = float3(M[0][0] + M[1][0], M[0][1] + M[1][1], M[0][2] + M[1][2]); // (float3(k, k, 0) * M) / k;
-        x[0] += (rn2.x * rn2.y) * sh2;
-        x[1] += (-rn2.y * rn2.z) * sh2;
-        x[2] += (rn2.z * rn2.z * k3 + k4) * sh2;
-        x[3] += (-rn2.x * rn2.z) * sh2;
-        x[4] += (rn2.x * rn2.x - rn2.y * rn2.y) * sh2;
-    }
-
-    // Column 3
-    {
-        float3 rn3 = float3(M[0][0] + M[2][0], M[0][1] + M[2][1], M[0][2] + M[2][2]); // (float3(k, 0, k) * M) / k;
-        x[0] += (rn3.x * rn3.y) * sh3;
-        x[1] += (-rn3.y * rn3.z) * sh3;
-        x[2] += (rn3.z * rn3.z * k3 + k4) * sh3;
-        x[3] += (-rn3.x * rn3.z) * sh3;
-        x[4] += (rn3.x * rn3.x - rn3.y * rn3.y) * sh3;
-    }
-
-    // Column 4
-    {
-        float3 rn4 = float3(M[1][0] + M[2][0], M[1][1] + M[2][1], M[1][2] + M[2][2]); // (float3(0, k, k) * M) / k;
-        x[0] += (rn4.x * rn4.y) * sh4;
-        x[1] += (-rn4.y * rn4.z) * sh4;
-        x[2] += (rn4.z * rn4.z * k3 + k4) * sh4;
-        x[3] += (-rn4.x * rn4.z) * sh4;
-        x[4] += (rn4.x * rn4.x - rn4.y * rn4.y) * sh4;
-    }
-
-    x[4] *= 0.5;
-}
-
-void SHOutgoingRadiosityScalarRotate(float3x3 M, inout SHOutgoingRadiosityScalar shOutgoingRadiosityScalar)
-{
-    float x1[3];
-    x1[0] = shOutgoingRadiosityScalar.data[1];
-    x1[1] = shOutgoingRadiosityScalar.data[2];
-    x1[2] = shOutgoingRadiosityScalar.data[3];
-    SHOutgoingRadiosityScalarRotateBand1(M, x1);
-    float x2[5];
-    x2[0] = shOutgoingRadiosityScalar.data[4];
-    x2[1] = shOutgoingRadiosityScalar.data[5];
-    x2[2] = shOutgoingRadiosityScalar.data[6];
-    x2[3] = shOutgoingRadiosityScalar.data[7];
-    x2[4] = shOutgoingRadiosityScalar.data[8];
-    SHOutgoingRadiosityScalarRotateBand2(M, x2);
-    shOutgoingRadiosityScalar.data[1] = x1[0];
-    shOutgoingRadiosityScalar.data[2] = x1[1];
-    shOutgoingRadiosityScalar.data[3] = x1[2];
-    shOutgoingRadiosityScalar.data[4] = x2[0];
-    shOutgoingRadiosityScalar.data[5] = x2[1];
-    shOutgoingRadiosityScalar.data[6] = x2[2];
-    shOutgoingRadiosityScalar.data[7] = x2[3];
-    shOutgoingRadiosityScalar.data[8] = x2[4];
-}
-
-// optimal linear direction, related to bent normal, etc.
-float3 SHOutgoingRadiosityScalarGetOptimalLinear(const SHOutgoingRadiosityScalar shOutgoingRadiosityScalar)
-{
-    return float3(-shOutgoingRadiosityScalar.data[3], -shOutgoingRadiosityScalar.data[1], shOutgoingRadiosityScalar.data[2]);
-}
-
-// source: Building an Orthonormal Basis, Revisited
-// http://jcgt.org/published/0006/01/01/
-// Same as reference implementation, except transposed.
-float3x3 ComputeTangentToWorldMatrix(float3 n)
-{
-    float3x3 res;
-    res[2][0] = n.x;
-    res[2][1] = n.y;
-    res[2][2] = n.z;
-
-    float s = (n.z >= 0.0f) ? 1.0f : -1.0f;
-    float a = -1.0f / (s + n.z);
-    float b = n.x * n.y * a;
-
-    res[0][0] = 1.0f + s * n.x * n.x * a;
-    res[0][1] = s * b;
-    res[0][2] = -s * n.x;
-
-    res[1][0] = b;
-    res[1][1] = s + n.y * n.y * a;
-    res[1][2] = -n.y;
-
-    return res;
-}
-
-void FrameFromNormal(float3 normal, out float3 tangent, out float3 binormal)
-{
-#if 0
-    // PPSloan version:
-    if (abs(normal.x) > abs(normal.z))
-    {
-        binormal.x = -normal.y;
-        binormal.y = normal.x;
-        binormal.z = 0.0f;
-    }
-    else
-    {
-        binormal.x = 0.0f;
-        binormal.y = -normal.z;
-        binormal.z = normal.y;
-    }
-
-    binormal = normalize(binormal);
-    tangent = cross(binormal, normal);
-#else
-    float3x3 tangentToWorldMatrix = ComputeTangentToWorldMatrix(normal);
-    float3x3 worldToTangentMatrix = transpose(transpose(tangentToWorldMatrix)); // TODO: Test this transpose - from the conversion from glsl to hlsl
-    binormal = worldToTangentMatrix[1];
-    tangent = worldToTangentMatrix[0];
-#endif
-}
-
 // a * z^2 + b * z + c + ce is the function, we add the window parameters l2, l1
 // l2 * a * z^2 + l1 * b * z + l2 *c + c3, and want to make sure we make the minimum zero (or some positive epsilon)
 float ComputeMinError(const float a, const float b, const float c, inout float zmin)
@@ -569,7 +400,7 @@ void SHZHDeRingFull(float zh[3], inout float4 window, const float q1err, const f
 
 void SHOutgoingRadiosityScalarComputeWindow(const SHOutgoingRadiosityScalar shOutgoingRadiosityScalar, inout float4 window)
 {
-    float3 optLin = SHOutgoingRadiosityScalarGetOptimalLinear(shOutgoingRadiosityScalar);
+    float3 optLin = SHOutgoingRadiosityScalarGetOptimalLinearDirection(shOutgoingRadiosityScalar);
 
     float vecMag = sqrt(dot(optLin, optLin));
 
diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeSphericalHarmonicsLighting.hlsl b/com.unity.render-pipelines.high-definition/Runtime/Lighting/ProbeVolume/DynamicGI/ProbeVolumeSphericalHarmonicsLighting.hlsl