From 0b51ba8a6a378dfc8cb6eb4272db60932b1ac814 Mon Sep 17 00:00:00 2001 From: Stuart Carnie Date: Mon, 19 Jan 2026 07:22:16 +1100 Subject: [PATCH] Renderer: Simplify shader and cluster builder - gl_HelperInvocation + MSAA is the root cause of the rendering artifacts. - Disable the gl_HelperInvocation check using a specialisation constant, which removes two variants from compilation - Remove NO_IMAGE_ATOMICS variant, as the macros are not used in cluster_render.glsl, removing a further two variants. --- .../renderer_rd/cluster_builder_rd.cpp | 81 ++++--------------- .../renderer_rd/cluster_builder_rd.h | 11 --- .../renderer_rd/shaders/cluster_render.glsl | 16 ++-- 3 files changed, 22 insertions(+), 86 deletions(-) diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp index 65be409491c6..c44a4da93b3f 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.cpp +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -57,89 +57,40 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() { Vector variants; variants.push_back(""); variants.push_back("\n#define USE_ATTACHMENT\n"); - variants.push_back("\n#define MOLTENVK_USED\n#define NO_IMAGE_ATOMICS\n"); - variants.push_back("\n#define USE_ATTACHMENT\n#define MOLTENVK_USED\n#define NO_IMAGE_ATOMICS\n"); - variants.push_back("\n#define NO_IMAGE_ATOMICS\n"); - variants.push_back("\n#define MOLTENVK_USED\n#define NO_IMAGE_ATOMICS\n"); ClusterRender::ShaderVariant shader_variant; RenderingDevice *rd = RD::get_singleton(); if (rd->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) { fb_format = rd->framebuffer_format_create_empty(); blend_state = RD::PipelineColorBlendState::create_disabled(); -#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)) - if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) { - shader_variant = ClusterRender::SHADER_NORMAL_MOLTENVK; - } else if (rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { - shader_variant = ClusterRender::SHADER_NORMAL; - } else { - shader_variant = ClusterRender::SHADER_NORMAL_NO_ATOMICS; - } -#else - if (rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { - shader_variant = ClusterRender::SHADER_NORMAL; - } else { - shader_variant = ClusterRender::SHADER_NORMAL_NO_ATOMICS; - } -#endif + shader_variant = ClusterRender::SHADER_NORMAL; } else { Vector afs; afs.push_back(RD::AttachmentFormat()); afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; fb_format = rd->framebuffer_format_create(afs); blend_state = RD::PipelineColorBlendState::create_blend(); -#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)) - if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) { - shader_variant = ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK; - } else if (rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { - shader_variant = ClusterRender::SHADER_USE_ATTACHMENT; - } else { - shader_variant = ClusterRender::SHADER_USE_ATTACHMENT_NO_ATOMICS; - } -#else - if (rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { - shader_variant = ClusterRender::SHADER_USE_ATTACHMENT; - } else { - shader_variant = ClusterRender::SHADER_USE_ATTACHMENT_NO_ATOMICS; - } -#endif + shader_variant = ClusterRender::SHADER_USE_ATTACHMENT; } cluster_render.cluster_render_shader.initialize(variants); -#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)) - if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) { - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_NO_ATOMICS, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_NO_ATOMICS, false); - } else if (rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_MOLTENVK, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_NO_ATOMICS, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_NO_ATOMICS, false); - } else { - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_MOLTENVK, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, false); - } -#else - if (rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_MOLTENVK, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_NO_ATOMICS, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_NO_ATOMICS, false); - } else { - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_MOLTENVK, false); - cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, false); - } -#endif cluster_render.shader_version = cluster_render.cluster_render_shader.version_create(); cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, shader_variant); cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); - cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, ms, RD::PipelineDepthStencilState(), blend_state, 0); + + // On Apple platforms, gl_HelperInvocation (simd_is_helper_thread()) is unreliable with MSAA, causing rendering artifacts. + // Disable the helper invocation check for the MSAA pipeline via specialization constant. + Vector specialization_constants; +#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) + { + RD::PipelineSpecializationConstant sc; + sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; + sc.constant_id = 0; // sc_use_helper_check + sc.bool_value = false; + specialization_constants.push_back(sc); + } +#endif + cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, ms, RD::PipelineDepthStencilState(), blend_state, 0, 0, specialization_constants); } { Vector versions; diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.h b/servers/rendering/renderer_rd/cluster_builder_rd.h index b6acf2a37abf..46243b935747 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.h +++ b/servers/rendering/renderer_rd/cluster_builder_rd.h @@ -76,10 +76,6 @@ class ClusterBuilderSharedDataRD { enum ShaderVariant { SHADER_NORMAL, SHADER_USE_ATTACHMENT, - SHADER_NORMAL_MOLTENVK, - SHADER_USE_ATTACHMENT_MOLTENVK, - SHADER_NORMAL_NO_ATOMICS, - SHADER_USE_ATTACHMENT_NO_ATOMICS, }; enum PipelineVersion { @@ -193,14 +189,7 @@ class ClusterBuilderRD { }; uint32_t cluster_size = 32; -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - // Results in visual artifacts on macOS and iOS/visionOS when using MSAA and subgroups. - // Using subgroups and disabling MSAA is the optimal solution for now and also works - // with MoltenVK. - bool use_msaa = false; -#else bool use_msaa = true; -#endif Divisor divisor = DIVISOR_4; Size2i screen_size; diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl index 91ab74883582..126bd75f7586 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_render.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -69,6 +69,10 @@ void main() { #extension GL_KHR_shader_subgroup_arithmetic : enable #extension GL_KHR_shader_subgroup_vote : enable +// On Apple platforms, gl_HelperInvocation (simd_is_helper_thread()) is unreliable with MSAA, causing rendering artifacts. +// Setting this to false will disable the helper invocation check. +layout(constant_id = 0) const bool sc_use_helper_check = true; + layout(location = 0) in float depth_interp; layout(location = 1) in flat uint element_index; @@ -114,11 +118,7 @@ void main() { uint aux = 0; uint cluster_thread_group_index; -#ifndef MOLTENVK_USED - if (!gl_HelperInvocation) { -#else - { -#endif + if (!sc_use_helper_check || !gl_HelperInvocation) { //https://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf uvec4 mask; @@ -151,11 +151,7 @@ void main() { uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index; uint z_write_bit = 1 << z_bit; -#ifndef MOLTENVK_USED - if (!gl_HelperInvocation) { -#else - { -#endif + if (!sc_use_helper_check || !gl_HelperInvocation) { z_write_bit = subgroupOr(z_write_bit); //merge all Zs if (cluster_thread_group_index == 0) { aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);