Skip to content

Commit

Permalink
Merge pull request #2233 from cdavis5e/agx-cube-grad-fixup
Browse files Browse the repository at this point in the history
MSL: Work around broken cube texture gradients on Apple Silicon.
  • Loading branch information
HansKristian-Work authored Nov 28, 2023
2 parents 3717660 + 18976c4 commit 50e90dd
Show file tree
Hide file tree
Showing 13 changed files with 382 additions and 15 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ set(spirv-cross-util-sources
${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross_util.hpp)

set(spirv-cross-abi-major 0)
set(spirv-cross-abi-minor 57)
set(spirv-cross-abi-minor 58)
set(spirv-cross-abi-patch 0)
set(SPIRV_CROSS_VERSION ${spirv-cross-abi-major}.${spirv-cross-abi-minor}.${spirv-cross-abi-patch})

Expand Down
14 changes: 14 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,8 @@ struct CLIArguments
bool msl_sample_dref_lod_array_as_grad = false;
bool msl_runtime_array_rich_descriptor = false;
bool msl_replace_recursive_inputs = false;
bool msl_readwrite_texture_fences = true;
bool msl_agx_manual_cube_grad_fixup = false;
const char *msl_combined_sampler_suffix = nullptr;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
Expand Down Expand Up @@ -958,6 +960,14 @@ static void print_help_msl()
"\t\tSome Metal devices have a bug where the level() argument to\n"
"\t\tdepth2d_array<T>::sample_compare() in a fragment shader is biased by some\n"
"\t\tunknown amount. This prevents the bias from being added.\n"
"\t[--msl-no-readwrite-texture-fences]:\n\t\tDo not insert fences before each read of a\n"
"\t\tread_write texture. MSL does not guarantee coherence between writes and later reads\n"
"\t\tof read_write textures. If you don't rely on this, you can disable this for a\n"
"\t\tpossible performance improvement.\n"
"\t[--msl-agx-manual-cube-grad-fixup]:\n\t\tManually transform cube texture gradients.\n"
"\t\tAll released Apple Silicon GPUs to date ignore one of the three partial derivatives\n"
"\t\tbased on the selected major axis, and expect the remaining derivatives to be\n"
"\t\tpartially transformed. This fixup gives correct results on Apple Silicon.\n"
"\t[--msl-combined-sampler-suffix <suffix>]:\n\t\tUses a custom suffix for combined samplers.\n");
// clang-format on
}
Expand Down Expand Up @@ -1236,6 +1246,8 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.ios_support_base_vertex_instance = true;
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences;
msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup;
msl_comp->set_msl_options(msl_opts);
for (auto &v : args.msl_discrete_descriptor_sets)
msl_comp->add_discrete_descriptor_set(v);
Expand Down Expand Up @@ -1790,6 +1802,8 @@ static int main_inner(int argc, char *argv[])
cbs.add("--msl-check-discarded-frag-stores", [&args](CLIParser &) { args.msl_check_discarded_frag_stores = true; });
cbs.add("--msl-sample-dref-lod-array-as-grad",
[&args](CLIParser &) { args.msl_sample_dref_lod_array_as_grad = true; });
cbs.add("--msl-no-readwrite-texture-fences", [&args](CLIParser &) { args.msl_readwrite_texture_fences = false; });
cbs.add("--msl-agx-manual-cube-grad-fixup", [&args](CLIParser &) { args.msl_agx_manual_cube_grad_fixup = true; });
cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) {
args.msl_combined_sampler_suffix = parser.next_string();
});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);

// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);

// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}

struct main0_out
{
float4 o_color [[color(0)]];
};

struct main0_in
{
float4 v_texCoord [[user(locn0)]];
float2 v_drefLodBias [[user(locn1)]];
};

fragment main0_out main0(main0_in in [[stage_in]], depthcube_array<float> u_sampler [[texture(0)]], sampler u_samplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.o_color = float4(u_sampler.sample_compare(u_samplerSmplr, in.v_texCoord.xyz, uint(rint(in.v_texCoord.w)), in.v_drefLodBias.x, spvGradientCube(in.v_texCoord.xyz, exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()), exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()))), 0.0, 0.0, 1.0);
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);

// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);

// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}

struct main0_out
{
float4 FragColor [[color(0)]];
};

struct main0_in
{
float3 vTex [[user(locn0), flat]];
};

fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor += uSampler.sample(uSamplerSmplr, in.vTex, spvGradientCube(in.vTex, float3(5.0), float3(8.0)));
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);

// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);

// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}

struct buf0
{
float4 u_scale;
};

struct buf1
{
float4 u_bias;
};

struct main0_out
{
float4 o_color [[color(0)]];
};

struct main0_in
{
float4 v_texCoord [[user(locn0)]];
float2 v_drefLodBias [[user(locn1)]];
};

fragment main0_out main0(main0_in in [[stage_in]], depthcube_array<float> u_sampler [[texture(0)]], sampler u_samplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.o_color = float4(u_sampler.sample_compare(u_samplerSmplr, in.v_texCoord.xyz, uint(rint(in.v_texCoord.w)), in.v_drefLodBias.x, spvGradientCube(in.v_texCoord.xyz, exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()), exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()))), 0.0, 0.0, 1.0);
return out;
}

44 changes: 44 additions & 0 deletions reference/shaders-msl/frag/sampler-cube-grad.agx-cube-grad.frag
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);

// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);

// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}

struct main0_out
{
float4 FragColor [[color(0)]];
};

struct main0_in
{
float3 vTex [[user(locn0), flat]];
};

fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor += uSampler.sample(uSamplerSmplr, in.vTex, spvGradientCube(in.vTex, float3(5.0), float3(8.0)));
return out;
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
; SPIR-V
; Version: 1.3
; Generator: Khronos Glslang Reference Front End; 11
; Bound: 45
; Schema: 0
OpCapability Shader
OpCapability SampledCubeArray
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %o_color %v_texCoord %v_drefLodBias
OpExecutionMode %main OriginUpperLeft

; Debug Information
OpSource GLSL 450
OpName %main "main" ; id %4
OpName %o_color "o_color" ; id %9
OpName %u_sampler "u_sampler" ; id %13
OpName %v_texCoord "v_texCoord" ; id %16
OpName %v_drefLodBias "v_drefLodBias" ; id %21
OpName %buf0 "buf0" ; id %39
OpMemberName %buf0 0 "u_scale"
OpName %_ "" ; id %41
OpName %buf1 "buf1" ; id %42
OpMemberName %buf1 0 "u_bias"
OpName %__0 "" ; id %44

; Annotations
OpDecorate %o_color RelaxedPrecision
OpDecorate %o_color Location 0
OpDecorate %u_sampler DescriptorSet 0
OpDecorate %u_sampler Binding 0
OpDecorate %v_texCoord Location 0
OpDecorate %v_drefLodBias Location 1
OpMemberDecorate %buf0 0 Offset 0
OpDecorate %buf0 Block
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 1
OpMemberDecorate %buf1 0 Offset 0
OpDecorate %buf1 Block
OpDecorate %__0 DescriptorSet 0
OpDecorate %__0 Binding 2

; Types, variables and constants
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%o_color = OpVariable %_ptr_Output_v4float Output
%10 = OpTypeImage %float Cube 1 1 0 1 Unknown
%11 = OpTypeSampledImage %10
%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
%u_sampler = OpVariable %_ptr_UniformConstant_11 UniformConstant
%_ptr_Input_v4float = OpTypePointer Input %v4float
%v_texCoord = OpVariable %_ptr_Input_v4float Input
%v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
%v_drefLodBias = OpVariable %_ptr_Input_v2float Input
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
%v3float = OpTypeVector %float 3
%uint_1 = OpConstant %uint 1
%float_0 = OpConstant %float 0
%float_1 = OpConstant %float 1
%buf0 = OpTypeStruct %v4float
%_ptr_Uniform_buf0 = OpTypePointer Uniform %buf0
%_ = OpVariable %_ptr_Uniform_buf0 Uniform
%buf1 = OpTypeStruct %v4float
%_ptr_Uniform_buf1 = OpTypePointer Uniform %buf1
%__0 = OpVariable %_ptr_Uniform_buf1 Uniform

; Function main
%main = OpFunction %void None %3
%5 = OpLabel
%14 = OpLoad %11 %u_sampler
%18 = OpLoad %v4float %v_texCoord
%25 = OpAccessChain %_ptr_Input_float %v_drefLodBias %uint_0
%26 = OpLoad %float %25
%32 = OpAccessChain %_ptr_Input_float %v_drefLodBias %uint_1
%33 = OpLoad %float %32
%35 = OpImageSampleDrefExplicitLod %float %14 %18 %26 Lod %33
%38 = OpCompositeConstruct %v4float %35 %float_0 %float_0 %float_1
OpStore %o_color %38
OpReturn
OpFunctionEnd
10 changes: 10 additions & 0 deletions shaders-msl/frag/sampler-cube-grad.agx-cube-grad.frag
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#version 450

layout(location = 0) out vec4 FragColor;
layout(location = 0) flat in vec3 vTex;
layout(binding = 0) uniform samplerCube uSampler;

void main()
{
FragColor += textureGrad(uSampler, vTex, vec3(5.0), vec3(8.0));
}
12 changes: 12 additions & 0 deletions spirv_cross_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,18 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
case SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD:
options->msl.sample_dref_lod_array_as_grad = value != 0;
break;

case SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES:
options->msl.readwrite_texture_fences = value != 0;
break;

case SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS:
options->msl.replace_recursive_inputs = value != 0;
break;

case SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP:
options->msl.agx_manual_cube_grad_fixup = value != 0;
break;
#endif

default:
Expand Down
5 changes: 4 additions & 1 deletion spirv_cross_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
#define SPVC_C_API_VERSION_MINOR 57
#define SPVC_C_API_VERSION_MINOR 58
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0

Expand Down Expand Up @@ -725,6 +725,9 @@ typedef enum spvc_compiler_option

SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER = 84 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD = 85 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES = 86 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS = 87 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP = 88 | SPVC_COMPILER_OPTION_MSL_BIT,

SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
} spvc_compiler_option;
Expand Down
Loading

0 comments on commit 50e90dd

Please sign in to comment.