TAA fixes and improvements

pixelflinger · pixelflinger · commit 87249950421b · 2024-12-17T21:49:00.000-08:00
- use a lanczos filter for sampling the color buffer, instead of a
  blackman-Harris window. This improves sharpness quite a bit.

- some cleanups of the shader code

- never use YCoCg when rectification is not enabled.

- fix the calculation of the confidence paramter when upscaling is used.


Upscaling works a lot better now, but it is still work in progress.
diff --git a/filament/include/filament/Options.h b/filament/include/filament/Options.h
@@ -438,7 +438,7 @@ struct MultiSampleAntiAliasingOptions {
  * @see setTemporalAntiAliasingOptions()
  */
 struct TemporalAntiAliasingOptions {
-    float filterWidth = 1.0f;   //!< reconstruction filter width typically between 0.2 (sharper, aliased) and 1.5 (smoother)
+    float filterWidth = 1.0f;   //!< reconstruction filter width typically between 1 (sharper) and 2 (smoother)
     float feedback = 0.12f;     //!< history feedback, between 0 (maximum temporal AA) and 1 (no temporal AA).
     float lodBias = -1.0f;      //!< texturing lod bias (typically -1 or -2)
     float sharpness = 0.0f;     //!< post-TAA sharpen, especially useful when upscaling is true.
diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp
@@ -2622,7 +2622,7 @@ void PostProcessManager::TaaJitterCamera(
     current.projection = inoutCameraInfo->projection * inoutCameraInfo->getUserViewMatrix();
     current.frameId = previous.frameId + 1;
 
-    auto jitterPosition = [pattern = taaOptions.jitterPattern](size_t frameIndex){
+    auto jitterPosition = [pattern = taaOptions.jitterPattern](size_t frameIndex) -> float2 {
         using JitterPattern = TemporalAntiAliasingOptions::JitterPattern;
         switch (pattern) {
             case JitterPattern::RGSS_X4:
@@ -2636,6 +2636,7 @@ void PostProcessManager::TaaJitterCamera(
             case JitterPattern::HALTON_23_X32:
                 return sHaltonSamples(frameIndex);
         }
+        return { 0.0f, 0.0f };
     };
 
     // sample position within a pixel [-0.5, 0.5]
@@ -2759,15 +2760,31 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::taa(FrameGraph& fg,
                 }};
 
                 constexpr float2 sampleOffsets[9] = {
-                        { -1.0f, -1.0f }, {  0.0f, -1.0f }, {  1.0f, -1.0f },
-                        { -1.0f,  0.0f }, {  0.0f,  0.0f }, {  1.0f,  0.0f },
-                        { -1.0f,  1.0f }, {  0.0f,  1.0f }, {  1.0f,  1.0f },
+                        { -1.0f, -1.0f }, {  0.0f, -1.0f }, {  1.0f, -1.0f }, { -1.0f,  0.0f },
+                        {  0.0f,  0.0f },
+                        {  1.0f,  0.0f }, { -1.0f,  1.0f }, {  0.0f,  1.0f }, {  1.0f,  1.0f },
                 };
 
                 constexpr float2 subSampleOffsets[4] = {
-                        { -0.25f, 0.25f }, {  0.25f, 0.25f }, { 0.25f, -0.25f }, { -0.25f, -0.25f }
+                        { -0.25f,  0.25f },
+                        {  0.25f,  0.25f },
+                        {  0.25f, -0.25f },
+                        { -0.25f, -0.25f }
                 };
 
+                UTILS_UNUSED
+                auto const lanczos = [](float x, float a) -> float {
+                    if (x <= std::numeric_limits<float>::epsilon()) {
+                        return 1.0f;
+                    }
+                    if (std::abs(x) <= a) {
+                        return (a * std::sin(f::PI * x) * std::sin(f::PI * x / a))
+                               / ((f::PI * f::PI) * (x * x));
+                    }
+                    return 0.0f;
+                };
+
+                float const filterWidth = std::clamp(taaOptions.filterWidth, 1.0f, 2.0f);
                 float4 sum = 0.0;
                 float4 weights[9];
 
@@ -2777,11 +2794,9 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::taa(FrameGraph& fg,
                 for (size_t i = 0; i < 9; i++) {
                     float2 const o = sampleOffsets[i];
                     for (size_t j = 0; j < 4; j++) {
-                        float2 const s = taaOptions.upscaling ? subSampleOffsets[j] : float2{ 0 };
-                        float2 const d = (o - current.jitter - s) / taaOptions.filterWidth;
-                        // This is a gaussian fit of a 3.3-wide Blackman-Harris window
-                        // see: "High Quality Temporal Supersampling" by Brian Karis
-                        weights[i][j] = std::exp(-2.29f * (d.x * d.x + d.y * d.y));
+                        float2 const subPixelOffset = taaOptions.upscaling ? subSampleOffsets[j] : float2{ 0 };
+                        float2 const d = (o - (current.jitter - subPixelOffset)) / filterWidth;
+                        weights[i][j] = lanczos(length(d), filterWidth);
                     }
                     sum += weights[i];
                 }
diff --git a/filament/src/PostProcessManager.h b/filament/src/PostProcessManager.h
@@ -428,7 +428,7 @@ class PostProcessManager {
 
     template<size_t SIZE>
     struct JitterSequence {
-        auto operator()(size_t i) const noexcept { return positions[i % SIZE] - 0.5f; }
+        math::float2 operator()(size_t i) const noexcept { return positions[i % SIZE] - 0.5f; }
         const std::array<math::float2, SIZE> positions;
     };
 
diff --git a/filament/src/materials/antiAliasing/taa.mat b/filament/src/materials/antiAliasing/taa.mat
@@ -127,7 +127,8 @@ float lumaYCoCg(const vec3 c) {
 }
 
 float luma(const vec3 c) {
-    return materialConstants_useYCoCg ? lumaYCoCg(c) : lumaRGB(c);
+    return (materialConstants_useYCoCg && materialConstants_boxClipping != BOX_CLIPPING_NONE) ?
+            lumaYCoCg(c) : lumaRGB(c);
 }
 
 vec3 tonemap(const vec3 c) {
@@ -278,10 +279,6 @@ void postProcess(inout PostProcessInputs postProcess) {
         history = textureLod(materialParams_history, uv.zw, 0.0);
     }
 
-    if (materialConstants_useYCoCg) {
-        history.rgb = RGB_YCoCg(history.rgb);
-    }
-
     highp vec2 size = vec2(textureSize(materialParams_color, 0));
     highp vec2 p = (floor(uv.xy * size) + 0.5) / size;
     vec4 filtered = textureLod(materialParams_color, p, 0.0);
@@ -297,47 +294,46 @@ void postProcess(inout PostProcessInputs postProcess) {
         s[6] = textureLodOffset(materialParams_color, p, 0.0, ivec2(-1,  1)).rgb;
         s[7] = textureLodOffset(materialParams_color, p, 0.0, ivec2( 0,  1)).rgb;
         s[8] = textureLodOffset(materialParams_color, p, 0.0, ivec2( 1,  1)).rgb;
-        if (materialConstants_useYCoCg) {
-            for (int i = 0; i < 9; i++) {
-                s[i] = RGB_YCoCg(s[i]);
-            }
-        }
     }
 
-    vec2 subPixelOffset = p - uv.xy;  // +/- [0.25, 0.25]
-    float confidence = materialConstants_upscaling ? 0.0 : 1.0;
+    int j = 0;
+    float confidence = 1.0;
+    if (materialConstants_upscaling) {
+        highp vec2 subPixelOffset = (p - uv.xy) * size;  // +/- [0.25, 0.25]
+
+        // we reduce the contribution of a sample based on the distance
+        // to the high resolution pixel center
+        const float cutoff = 0.5;
+        highp float l = length(materialParams.jitter - subPixelOffset) / cutoff;
+        confidence = saturate(1.0 - l * l);
+
+        if (materialConstants_filterInput) {
+            int jxp = subPixelOffset.y > 0.0 ? 1 : 2;
+            int jxn = subPixelOffset.y > 0.0 ? 0 : 3;
+            j = subPixelOffset.x > 0.0 ? jxp : jxn;
+        }
+    }
 
     if (materialConstants_filterInput) {
         // unjitter/filter input
-        // figure out which set of coeficients to use
-        filtered = vec4(0, 0, 0, filtered.a);
-        if (materialConstants_upscaling) {
-            int jxp = subPixelOffset.y > 0.0 ? 3 : 0;
-            int jxn = subPixelOffset.y > 0.0 ? 2 : 1;
-            int j   = subPixelOffset.x > 0.0 ? jxp : jxn;
-            for (int i = 0; i < 9; i++) {
-                float w = materialParams.filterWeights[i][j];
-                filtered.rgb += s[i] * w;
-                confidence = max(confidence, w);
-            }
-        } else {
-            for (int i = 0; i < 9; i++) {
-                float w = materialParams.filterWeights[i][0];
-                filtered.rgb += s[i] * w;
-            }
-        }
-    } else {
-        if (materialConstants_useYCoCg) {
-            filtered.rgb = RGB_YCoCg(filtered.rgb);
-        }
-        if (materialConstants_upscaling) {
-            confidence = float(materialParams.jitter.x * subPixelOffset.x > 0.0 &&
-                               materialParams.jitter.y * subPixelOffset.y > 0.0);
+        filtered = vec4(vec3(0), filtered.a);
+        for (int i = 0; i < 9; i++) {
+            float w = materialParams.filterWeights[i][j];
+            filtered.rgb += s[i] * w;
         }
+        filtered.rgb = max(filtered.rgb, vec3(0));
     }
 
     // build the history clamping box
     if (materialConstants_boxClipping != BOX_CLIPPING_NONE) {
+        if (materialConstants_useYCoCg) {
+            history.rgb = RGB_YCoCg(history.rgb);
+            filtered.rgb = RGB_YCoCg(filtered.rgb);
+            for (int i = 0; i < 9; i++) {
+                s[i] = RGB_YCoCg(s[i]);
+            }
+        }
+
         vec3 boxmin;
         vec3 boxmax;
         if (materialConstants_boxType == BOX_TYPE_AABB ||
@@ -346,7 +342,7 @@ void postProcess(inout PostProcessInputs postProcess) {
             boxmax = max(s[4], max(max(s[1], s[3]), max(s[5], s[7])));
             vec3 box9min = min(boxmin, min(min(s[0], s[2]), min(s[6], s[8])));
             vec3 box9max = max(boxmax, max(max(s[0], s[2]), max(s[6], s[8])));
-            // round the corners of the 3x3 box
+            // round the corners of the 3x3 box, giving less importance to the corner samples
             boxmin = (boxmin + box9min) * 0.5;
             boxmax = (boxmax + box9max) * 0.5;
         }
@@ -388,9 +384,11 @@ void postProcess(inout PostProcessInputs postProcess) {
     }
 
     // go back to RGB space before tonemapping
-    if (materialConstants_useYCoCg) {
-        filtered.rgb = YCoCg_RGB(filtered.rgb);
-        history.rgb = YCoCg_RGB(history.rgb);
+    if (materialConstants_boxClipping != BOX_CLIPPING_NONE) {
+        if (materialConstants_useYCoCg) {
+            filtered.rgb = YCoCg_RGB(filtered.rgb);
+            history.rgb = YCoCg_RGB(history.rgb);
+        }
     }
 
     // tonemap before mixing