Skip to content

Commit

Permalink
Merge pull request #2214 from KhronosGroup/fix-2206
Browse files Browse the repository at this point in the history
MSL: Use powr instead of pow.
  • Loading branch information
HansKristian-Work authored Dec 7, 2023
2 parents 9da5f7c + 09ba765 commit e6b013a
Show file tree
Hide file tree
Showing 12 changed files with 128 additions and 123 deletions.
2 changes: 1 addition & 1 deletion reference/opt/shaders-ue4/asm/frag/depth-compare.asm.frag
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ fragment main0_out main0(constant type_View& View [[buffer(0)]], constant type_G
float4 _260 = SSProfilesTexture.read(uint2(int3(1, int(uint(fma(select(float4(0.0), SceneTexturesStruct_GBufferDTexture.sample(SceneTexturesStruct_GBufferDTextureSampler, _114, level(0.0)), bool4(!(((_240 & 4294967280u) & 16u) != 0u))).x, 255.0, 0.5))), 0).xy), 0);
float _263 = _260.y * 0.5;
float3 _266 = fma(-_236, float3(_263), _148);
float _274 = pow(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0);
float _274 = powr(fast::clamp(dot(-(_152 * float3(rsqrt(dot(_152, _152)))), _236), 0.0, 1.0), 1.0);
float _445;
if (_160)
{
Expand Down
34 changes: 17 additions & 17 deletions reference/opt/shaders-ue4/asm/frag/global-constant-arrays.asm.frag
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
float3 _599;
if (_Globals.OutputDevice >= 3u)
{
float3 _591 = pow(_577, float3(0.0126833133399486541748046875));
_599 = pow(fast::max(float3(0.0), _591 - float3(0.8359375)) / fma(float3(-18.6875), _591, float3(18.8515625)), float3(6.277394771575927734375)) * float3(10000.0);
float3 _591 = powr(_577, float3(0.0126833133399486541748046875));
_599 = powr(fast::max(float3(0.0), _591 - float3(0.8359375)) / fma(float3(-18.6875), _591, float3(18.8515625)), float3(6.277394771575927734375)) * float3(10000.0);
}
else
{
Expand Down Expand Up @@ -212,7 +212,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
float4 _861 = _Globals.ColorGammaMidtones * _Globals.ColorGamma;
float4 _864 = _Globals.ColorGainMidtones * _Globals.ColorGain;
float4 _867 = _Globals.ColorOffsetMidtones + _Globals.ColorOffset;
float3 _905 = fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))), _817.xyz * float3(_817.w), _820.xyz + float3(_820.w)), float3(_852), fma(fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))), _766.xyz * float3(_766.w), _771.xyz + float3(_771.w)), float3(1.0 - _804), fma(pow(pow(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))), _864.xyz * float3(_864.w), _867.xyz + float3(_867.w)) * float3(_804 - _852)));
float3 _905 = fma(fma(powr(powr(fast::max(float3(0.0), mix(_772, _745, _808.xyz * float3(_808.w))) * float3(5.5555553436279296875), _811.xyz * float3(_811.w)) * float3(0.180000007152557373046875), float3(1.0) / (_814.xyz * float3(_814.w))), _817.xyz * float3(_817.w), _820.xyz + float3(_820.w)), float3(_852), fma(fma(powr(powr(fast::max(float3(0.0), mix(_772, _745, _751.xyz * float3(_751.w))) * float3(5.5555553436279296875), _756.xyz * float3(_756.w)) * float3(0.180000007152557373046875), float3(1.0) / (_761.xyz * float3(_761.w))), _766.xyz * float3(_766.w), _771.xyz + float3(_771.w)), float3(1.0 - _804), fma(powr(powr(fast::max(float3(0.0), mix(_772, _745, _855.xyz * float3(_855.w))) * float3(5.5555553436279296875), _858.xyz * float3(_858.w)) * float3(0.180000007152557373046875), float3(1.0) / (_861.xyz * float3(_861.w))), _864.xyz * float3(_864.w), _867.xyz + float3(_867.w)) * float3(_804 - _852)));
float3 _906 = _905 * _549;
float3 _914 = float3(_Globals.BlueCorrection);
float3 _916 = mix(_905, _905 * ((_551 * float3x3(float3(0.940437257289886474609375, -0.01830687932670116424560546875, 0.07786960899829864501953125), float3(0.008378696627914905548095703125, 0.82866001129150390625, 0.162961304187774658203125), float3(0.0005471261101774871349334716796875, -0.00088337459601461887359619140625, 1.00033628940582275390625))) * _550), _914) * _551;
Expand Down Expand Up @@ -316,7 +316,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
{
_1189 = fast::max(float3(0.0), mix(_1119, _1119 * ((_551 * float3x3(float3(1.06317996978759765625, 0.02339559979736804962158203125, -0.08657260239124298095703125), float3(-0.010633699595928192138671875, 1.2063200473785400390625, -0.1956900060176849365234375), float3(-0.0005908869788981974124908447265625, 0.00105247995816171169281005859375, 0.999538004398345947265625))) * _550), _914) * _549);
}
float3 _1218 = pow(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1189 * _1189, float3(_Globals.MappingPolynomial.y) * _1189) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
float3 _1218 = powr(fast::max(float3(0.0), mix((fma(float3(_Globals.MappingPolynomial.x), _1189 * _1189, float3(_Globals.MappingPolynomial.y) * _1189) + float3(_Globals.MappingPolynomial.z)) * float3(_Globals.ColorScale), _Globals.OverlayColor.xyz, float3(_Globals.OverlayColor.w))), float3(_Globals.InverseGamma.y));
float3 _3001;
if (_Globals.OutputDevice == 0u)
{
Expand All @@ -329,7 +329,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
_2973 = _2961 * 12.9200000762939453125;
break;
}
_2973 = fma(pow(_2961, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
_2973 = fma(powr(_2961, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
break;
}
float _2974 = _1218.y;
Expand All @@ -341,7 +341,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
_2986 = _2974 * 12.9200000762939453125;
break;
}
_2986 = fma(pow(_2974, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
_2986 = fma(powr(_2974, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
break;
}
float _2987 = _1218.z;
Expand All @@ -353,7 +353,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
_2999 = _2987 * 12.9200000762939453125;
break;
}
_2999 = fma(pow(_2987, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
_2999 = fma(powr(_2987, 0.4166666567325592041015625), 1.05499994754791259765625, -0.054999999701976776123046875);
break;
}
_3001 = float3(_2973, _2986, _2999);
Expand All @@ -364,7 +364,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
if (_Globals.OutputDevice == 1u)
{
float3 _2953 = fast::max(float3(6.1035199905745685100555419921875e-05), (_1218 * _547) * _576);
_2960 = fast::min(_2953 * float3(4.5), fma(pow(fast::max(_2953, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)), float3(1.09899997711181640625), float3(-0.098999999463558197021484375)));
_2960 = fast::min(_2953 * float3(4.5), fma(powr(fast::max(_2953, float3(0.017999999225139617919921875)), float3(0.449999988079071044921875)), float3(1.09899997711181640625), float3(-0.098999999463558197021484375)));
}
else
{
Expand Down Expand Up @@ -593,7 +593,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
}
_2475 = _2472;
}
float3 _2479 = (float3(pow(10.0, _2327), pow(10.0, _2401), pow(10.0, _2475)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
float3 _2479 = (float3(powr(10.0, _2327), powr(10.0, _2401), powr(10.0, _2475)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
float _2685 = _2479.x;
float _2688 = log((_2685 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _2685);
float _2689 = _2688 * 0.4342944622039794921875;
Expand Down Expand Up @@ -702,8 +702,8 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
}
_2936 = _2928;
}
float3 _2942 = pow(((float3(pow(10.0, _2768), pow(10.0, _2852), pow(10.0, _2936)) - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
_2950 = pow(fma(float3(18.8515625), _2942, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2942, float3(1.0))), float3(78.84375));
float3 _2942 = powr(((float3(powr(10.0, _2768), powr(10.0, _2852), powr(10.0, _2936)) - float3(3.5073844628641381859779357910156e-05)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
_2950 = powr(fma(float3(18.8515625), _2942, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2942, float3(1.0))), float3(78.84375));
}
else
{
Expand Down Expand Up @@ -932,7 +932,7 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
}
_1638 = _1635;
}
float3 _1642 = (float3(pow(10.0, _1490), pow(10.0, _1564), pow(10.0, _1638)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
float3 _1642 = (float3(powr(10.0, _1490), powr(10.0, _1564), powr(10.0, _1638)) * float3x3(float3(0.695452213287353515625, 0.140678703784942626953125, 0.16386906802654266357421875), float3(0.0447945632040500640869140625, 0.859671115875244140625, 0.095534317195415496826171875), float3(-0.0055258828215301036834716796875, 0.0040252101607620716094970703125, 1.00150072574615478515625))) * float3x3(float3(1.45143926143646240234375, -0.236510753631591796875, -0.214928567409515380859375), float3(-0.07655377686023712158203125, 1.1762297153472900390625, -0.0996759235858917236328125), float3(0.0083161480724811553955078125, -0.0060324496589601039886474609375, 0.99771630764007568359375));
float _1848 = _1642.x;
float _1851 = log((_1848 <= 0.0) ? 9.9999997473787516355514526367188e-05 : _1848);
float _1852 = _1851 * 0.4342944622039794921875;
Expand Down Expand Up @@ -1041,20 +1041,20 @@ fragment main0_out main0(main0_in in [[stage_in]], constant type_Globals& _Globa
}
_2084 = _2081;
}
float3 _2089 = pow((float3(pow(10.0, _1926), pow(10.0, _2005), pow(10.0, _2084)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
_2097 = pow(fma(float3(18.8515625), _2089, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2089, float3(1.0))), float3(78.84375));
float3 _2089 = powr((float3(powr(10.0, _1926), powr(10.0, _2005), powr(10.0, _2084)) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
_2097 = powr(fma(float3(18.8515625), _2089, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _2089, float3(1.0))), float3(78.84375));
}
else
{
float3 _1260;
if (_Globals.OutputDevice == 7u)
{
float3 _1252 = pow(((_906 * _547) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
_1260 = pow(fma(float3(18.8515625), _1252, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1252, float3(1.0))), float3(78.84375));
float3 _1252 = powr(((_906 * _547) * _576) * float3(9.9999997473787516355514526367188e-05), float3(0.1593017578125));
_1260 = powr(fma(float3(18.8515625), _1252, float3(0.8359375)) * (float3(1.0) / fma(float3(18.6875), _1252, float3(1.0))), float3(78.84375));
}
else
{
_1260 = pow((_1218 * _547) * _576, float3(_Globals.InverseGamma.z));
_1260 = powr((_1218 * _547) * _576, float3(_Globals.InverseGamma.z));
}
_2097 = _1260;
}
Expand Down
Loading

0 comments on commit e6b013a

Please sign in to comment.