diff --git a/main.cpp b/main.cpp index 9e768ef07..6361fc8ab 100644 --- a/main.cpp +++ b/main.cpp @@ -681,6 +681,7 @@ struct CLIArguments bool msl_replace_recursive_inputs = false; bool msl_readwrite_texture_fences = true; bool msl_agx_manual_cube_grad_fixup = false; + bool msl_input_attachment_is_ds_attachment = false; const char *msl_combined_sampler_suffix = nullptr; bool glsl_emit_push_constant_as_ubo = false; bool glsl_emit_ubo_as_plain_uniforms = false; @@ -873,6 +874,10 @@ static void print_help_msl() "\t[--msl-runtime-array-rich-descriptor]:\n\t\tWhen declaring a runtime array of SSBOs, declare an array of {ptr, len} pairs to support OpArrayLength.\n" "\t[--msl-replace-recursive-inputs]:\n\t\tWorks around a Metal 3.1 regression bug, which causes an infinite recursion crash during Metal's analysis of an entry point input structure that itself contains internal recursion.\n" "\t[--msl-texture-buffer-native]:\n\t\tEnable native support for texel buffers. Otherwise, it is emulated as a normal texture.\n" + "\t[--msl-input-attachment-is-ds-attachment]:\n\t\tAdds a simple depth passthrough in fragment shaders when they do not modify the depth value.\n" + "\t\tRequired to force Metal to write to the depth/stencil attachment post fragment execution.\n" + "\t\tOtherwise, Metal may optimize the write to pre fragment execution which goes against the Vulkan spec.\n" + "\t\tOnly required if an input attachment and depth/stencil attachment reference the same resource.\n" "\t[--msl-framebuffer-fetch]:\n\t\tImplement subpass inputs with frame buffer fetch.\n" "\t\tEmits [[color(N)]] inputs in fragment stage.\n" "\t\tRequires an Apple GPU.\n" @@ -1257,6 +1262,7 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.ios_support_base_vertex_instance = true; msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor; msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs; + msl_opts.input_attachment_is_ds_attachment = args.msl_input_attachment_is_ds_attachment; msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences; msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup; msl_comp->set_msl_options(msl_opts); @@ -1823,6 +1829,7 @@ static int main_inner(int argc, char *argv[]) [&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; }); cbs.add("--msl-replace-recursive-inputs", [&args](CLIParser &) { args.msl_replace_recursive_inputs = true; }); + cbs.add("--msl-input-attachment-is-ds-attachment", [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/reference/opt/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag b/reference/opt/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..016661084 --- /dev/null +++ b/reference/opt/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0(texture2d inputDepth [[texture(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.color = inputDepth.read(uint2(gl_FragCoord.xy)); + return out; +} + diff --git a/reference/opt/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag b/reference/opt/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..2e1079452 --- /dev/null +++ b/reference/opt/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color [[color(0)]]; + float gl_FragDepth [[depth(any)]]; +}; + +fragment main0_out main0(texture2d inputDepth [[texture(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.color = inputDepth.read(uint2(gl_FragCoord.xy)); + out.gl_FragDepth = 1.0; + return out; +} + diff --git a/reference/opt/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag b/reference/opt/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..f59750af3 --- /dev/null +++ b/reference/opt/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color [[color(0)]]; + float gl_FragDepth [[depth(any)]]; +}; + +fragment main0_out main0(texture2d inputDepth [[texture(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.color = inputDepth.read(uint2(gl_FragCoord.xy)); + out.gl_FragDepth = gl_FragCoord.z; + return out; +} + diff --git a/reference/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag b/reference/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..016661084 --- /dev/null +++ b/reference/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag @@ -0,0 +1,17 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color [[color(0)]]; +}; + +[[ early_fragment_tests ]] fragment main0_out main0(texture2d inputDepth [[texture(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.color = inputDepth.read(uint2(gl_FragCoord.xy)); + return out; +} + diff --git a/reference/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag b/reference/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..2e1079452 --- /dev/null +++ b/reference/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color [[color(0)]]; + float gl_FragDepth [[depth(any)]]; +}; + +fragment main0_out main0(texture2d inputDepth [[texture(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.color = inputDepth.read(uint2(gl_FragCoord.xy)); + out.gl_FragDepth = 1.0; + return out; +} + diff --git a/reference/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag b/reference/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..f59750af3 --- /dev/null +++ b/reference/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag @@ -0,0 +1,19 @@ +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 color [[color(0)]]; + float gl_FragDepth [[depth(any)]]; +}; + +fragment main0_out main0(texture2d inputDepth [[texture(0)]], float4 gl_FragCoord [[position]]) +{ + main0_out out = {}; + out.color = inputDepth.read(uint2(gl_FragCoord.xy)); + out.gl_FragDepth = gl_FragCoord.z; + return out; +} + diff --git a/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag b/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..a1d9f6322 --- /dev/null +++ b/shaders-msl/frag/force-depth-write-early-tests.input-attachment-is-ds-attachment.frag @@ -0,0 +1,12 @@ +#version 450 + +layout(early_fragment_tests) in; + +layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth; + +layout (location = 0) out vec4 color; + +void main() +{ + color = subpassLoad(inputDepth); +} diff --git a/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag b/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..27769b79c --- /dev/null +++ b/shaders-msl/frag/force-depth-write-false.input-attachment-is-ds-attachment.frag @@ -0,0 +1,11 @@ +#version 450 + +layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth; + +layout (location = 0) out vec4 color; + +void main() +{ + color = subpassLoad(inputDepth); + gl_FragDepth = 1.0f; +} diff --git a/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag b/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag new file mode 100644 index 000000000..eb6c4eb20 --- /dev/null +++ b/shaders-msl/frag/force-depth-write.input-attachment-is-ds-attachment.frag @@ -0,0 +1,10 @@ +#version 450 + +layout (input_attachment_index = 0, binding = 0) uniform subpassInput inputDepth; + +layout (location = 0) out vec4 color; + +void main() +{ + color = subpassLoad(inputDepth); +} diff --git a/spirv_msl.cpp b/spirv_msl.cpp index ef9076038..f8220c3c9 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -271,11 +271,14 @@ void CompilerMSL::build_implicit_builtins() active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance)); bool need_local_invocation_index = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId); bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups); + bool force_frag_depth_passthrough = + get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input && + msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment; if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation || - has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size) + has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough) { bool has_frag_coord = false; bool has_sample_id = false; @@ -292,6 +295,7 @@ void CompilerMSL::build_implicit_builtins() bool has_helper_invocation = false; bool has_local_invocation_index = false; bool has_workgroup_size = false; + bool has_frag_depth = false; uint32_t workgroup_id_type = 0; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { @@ -312,6 +316,13 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassOutput, BuiltInSampleMask, var.self); does_shader_write_sample_mask = true; } + + if (force_frag_depth_passthrough && builtin == BuiltInFragDepth) + { + builtin_frag_depth_id = var.self; + mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var.self); + has_frag_depth = true; + } } if (var.storage != StorageClassInput) @@ -902,6 +913,36 @@ void CompilerMSL::build_implicit_builtins() builtin_workgroup_size_id = var_id; mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); } + + if (!has_frag_depth && force_frag_depth_passthrough) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_FragDepth + SPIRType float_type { OpTypeFloat }; + float_type.basetype = SPIRType::Float; + float_type.width = 32; + float_type.vecsize = 1; + set(type_id, float_type); + + SPIRType float_type_ptr_in = float_type; + float_type_ptr_in.op = spv::OpTypePointer; + float_type_ptr_in.pointer = true; + float_type_ptr_in.pointer_depth++; + float_type_ptr_in.parent_type = type_id; + float_type_ptr_in.storage = StorageClassOutput; + + auto &ptr_in_type = set(type_ptr_id, float_type_ptr_in); + ptr_in_type.self = type_id; + set(var_id, type_ptr_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInFragDepth); + builtin_frag_depth_id = var_id; + mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id); + active_output_builtins.set(BuiltInFragDepth); + } } if (needs_swizzle_buffer_def) @@ -1571,6 +1612,8 @@ string CompilerMSL::compile() add_active_interface_variable(builtin_dispatch_base_id); if (builtin_sample_mask_id) add_active_interface_variable(builtin_sample_mask_id); + if (builtin_frag_depth_id) + add_active_interface_variable(builtin_frag_depth_id); // Create structs to hold input, output and uniform variables. // Do output first to ensure out. is declared at top of entry function. @@ -1869,8 +1912,13 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: { uint32_t base_id = ops[0]; if (global_var_ids.find(base_id) != global_var_ids.end()) + { added_arg_ids.insert(base_id); + if (msl_options.input_attachment_is_ds_attachment && base_id == builtin_frag_depth_id) + writes_to_depth = true; + } + uint32_t rvalue_id = ops[1]; if (global_var_ids.find(rvalue_id) != global_var_ids.end()) added_arg_ids.insert(rvalue_id); @@ -14513,16 +14561,33 @@ void CompilerMSL::fix_up_shader_inputs_outputs() } } else if (var.storage == StorageClassOutput && get_execution_model() == ExecutionModelFragment && - is_builtin_variable(var) && active_output_builtins.get(bi_type) && - bi_type == BuiltInSampleMask && has_additional_fixed_sample_mask()) + is_builtin_variable(var) && active_output_builtins.get(bi_type)) { - // If the additional fixed sample mask was set, we need to adjust the sample_mask - // output to reflect that. If the shader outputs the sample_mask itself too, we need - // to AND the two masks to get the final one. - string op_str = does_shader_write_sample_mask ? " &= " : " = "; - entry_func.fixup_hooks_out.push_back([=]() { - statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";"); - }); + switch (bi_type) + { + case BuiltInSampleMask: + if (has_additional_fixed_sample_mask()) + { + // If the additional fixed sample mask was set, we need to adjust the sample_mask + // output to reflect that. If the shader outputs the sample_mask itself too, we need + // to AND the two masks to get the final one. + string op_str = does_shader_write_sample_mask ? " &= " : " = "; + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_sample_mask_id), op_str, additional_fixed_sample_mask_str(), ";"); + }); + } + break; + case BuiltInFragDepth: + if (msl_options.input_attachment_is_ds_attachment && !writes_to_depth) + { + entry_func.fixup_hooks_out.push_back([=]() { + statement(to_expression(builtin_frag_depth_id), " = ", to_expression(builtin_frag_coord_id), ".z;"); + }); + } + break; + default: + break; + } } }); } diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 747690ebe..9a1715808 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -529,6 +529,13 @@ class CompilerMSL : public CompilerGLSL // with side effects. Provided as an option hoping Metal will fix this issue in the future. bool force_fragment_with_side_effects_execution = false; + // If set, adds a depth pass through statement to circumvent the following issue: + // When the same depth/stencil is used as input and depth/stencil attachment, we need to + // force Metal to perform the depth/stencil write after fragment execution. Otherwise, + // Metal will write to the depth attachment before fragment execution. This happens + // if the fragment does not modify the depth value. + bool input_attachment_is_ds_attachment = false; + bool is_ios() const { return platform == iOS; @@ -1094,6 +1101,7 @@ class CompilerMSL : public CompilerGLSL uint32_t builtin_stage_input_size_id = 0; uint32_t builtin_local_invocation_index_id = 0; uint32_t builtin_workgroup_size_id = 0; + uint32_t builtin_frag_depth_id = 0; uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0; @@ -1190,6 +1198,7 @@ class CompilerMSL : public CompilerGLSL bool needs_subgroup_size = false; bool needs_sample_id = false; bool needs_helper_invocation = false; + bool writes_to_depth = false; std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; diff --git a/test_shaders.py b/test_shaders.py index 796e9703d..481a2d152 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -388,6 +388,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): msl_args.append('--msl-runtime-array-rich-descriptor') if '.replace-recursive-inputs.' in shader: msl_args.append('--msl-replace-recursive-inputs') + if '.input-attachment-is-ds-attachment.' in shader: + msl_args.append('--msl-input-attachment-is-ds-attachment') if '.mask-location-0.' in shader: msl_args.append('--mask-stage-output-location') msl_args.append('0')