|
27 | 27 | // These days I use a variant of the RTNE/RN version that also preserves NaN payload bits,
|
28 | 28 | // which is slightly more ops but matches hardware conversions exactly for every input, including all NaNs.
|
29 | 29 | //
|
30 |
| -// TODO: bring over fast inverses (RTS, RTU, etc) |
| 30 | +// DONE: bring over fast inverses (RTS, RTU, etc) |
| 31 | +// DONE: need translation, rotation, scale |
| 32 | +// TODO: need fast post-translation, post-rotation, post-scale |
31 | 33 | //
|
32 | 34 | // TODO: saturating conversions would be useful to, and prevent overflow
|
33 | 35 | // see the conversion.h code, bit select to clamp values.
|
|
49 | 51 | // c02
|
50 | 52 | // c03
|
51 | 53 | //
|
| 54 | +// col: TRS * TRS * v cameraToWorldTfm * worldToModelTfm * .. |
| 55 | +// row: v * SRT * SRT modelToWorldTfm * worldToCameraTfm * ... |
| 56 | + |
52 | 57 | // TODO: need natvis and lldb formatting of math classes.
|
53 | 58 |
|
54 | 59 | //-----------------
|
55 | 60 |
|
56 | 61 |
|
57 | 62 | namespace SIMD_NAMESPACE {
|
58 | 63 |
|
| 64 | +// Which cmath had this |
| 65 | +inline void sincosf(float angleInRadians, float& s, float& c) { |
| 66 | + s = sinf(angleInRadians); |
| 67 | + c = cosf(angleInRadians); |
| 68 | +} |
| 69 | + |
59 | 70 | // These aren't embedded in function, so may have pre-init ordering issues.
|
60 | 71 | // or could add pre-init order to skip using functions.
|
61 | 72 | // Expose these through function calls as const&
|
@@ -655,9 +666,8 @@ static const float4 kConvertToMatrix = {0.0f,1.0f,2.0f,-2.0f};
|
655 | 666 |
|
656 | 667 | quatf::quatf(float3 axis, float angleInRadians)
|
657 | 668 | {
|
658 |
| - float c = ::cosf(angleInRadians * 0.5f); |
659 |
| - float s = ::sinf(angleInRadians * 0.5f); |
660 |
| - |
| 669 | + float s, c; |
| 670 | + sincosf(angleInRadians * 0.5f, s, c); |
661 | 671 | v = float4m(s * axis, c);
|
662 | 672 | }
|
663 | 673 |
|
@@ -763,13 +773,15 @@ inline void quat_bezier_cp_impl(quatf q0, quatf q1, quatf q2,
|
763 | 773 | quatf& a1, quatf& b1)
|
764 | 774 | {
|
765 | 775 | // TODO: find out of these were quat or vec mul?
|
766 |
| - a1.v = 2.0f * dot(q0.v,q1.v) * q1.v - q0.v; // Double(q0, q1); |
| 776 | + // Double(q0, q1); |
| 777 | + a1.v = 2.0f * dot(q0.v,q1.v) * q1.v - q0.v; |
767 | 778 |
|
768 | 779 | // Bisect(a1, q2);
|
769 | 780 | a1.v = (a1.v + q2.v);
|
770 | 781 | a1.v = normalize(a1.v);
|
771 | 782 |
|
772 |
| - b1.v = 2.0f * dot(a1.v,q1.v) * q1.v - a1.v; // Double(a1, q1); |
| 783 | + // Double(a1, q1); |
| 784 | + b1.v = 2.0f * dot(a1.v,q1.v) * q1.v - a1.v; |
773 | 785 | }
|
774 | 786 |
|
775 | 787 |
|
@@ -808,6 +820,140 @@ quatf quat_bezer_lerp(quatf a, quatf b, quatf c, quatf d, float t)
|
808 | 820 | #endif // SIMD_FLOAT
|
809 | 821 |
|
810 | 822 |
|
| 823 | +#if SIMD_FLOAT |
| 824 | + |
| 825 | +void transpose_affine(float4x4& m) |
| 826 | +{ |
| 827 | + // avoid copy and one shuffle |
| 828 | + float4 tmp3, tmp2, tmp1, tmp0; |
| 829 | + |
| 830 | + // TOOD: use platform shuffles on Neon |
| 831 | + // this is using sse2neon |
| 832 | + tmp0 = _mm_shuffle_ps(m[0], m[1], 0x44); |
| 833 | + tmp2 = _mm_shuffle_ps(m[0], m[1], 0xEE); |
| 834 | + |
| 835 | + tmp1 = _mm_shuffle_ps(m[2], m[3], 0x44); |
| 836 | + tmp3 = _mm_shuffle_ps(m[2], m[3], 0xEE); |
| 837 | + |
| 838 | + m[0] = _mm_shuffle_ps(tmp0, tmp1, 0x88); |
| 839 | + m[1] = _mm_shuffle_ps(tmp0, tmp1, 0xDD); |
| 840 | + m[2] = _mm_shuffle_ps(tmp2, tmp3, 0x88); |
| 841 | + |
| 842 | + // skips m[3] - known 0001 |
| 843 | +} |
| 844 | + |
| 845 | +float4x4 inverse_tr(const float4x4& mtx) |
| 846 | +{ |
| 847 | + float4x4 inverse(mtx); |
| 848 | + inverse[3] = float4_negw(); // will be flipped by matrix mul |
| 849 | + transpose_affine(inverse); // handle rotation (R)inv = (R)T |
| 850 | + |
| 851 | + inverse[3] = inverse * (-mtx[3]); // 1 mul, 3 mads |
| 852 | + |
| 853 | + return inverse; |
| 854 | +} |
| 855 | + |
| 856 | +// invert a row vector matrix |
| 857 | +float4x4 inverse_tru(const float4x4& mtx) |
| 858 | +{ |
| 859 | + bool success = false; |
| 860 | + |
| 861 | + float scaleX = length_squared(mtx[0]); |
| 862 | + |
| 863 | + float4x4 inverse(mtx); |
| 864 | + if (scaleX > 1e-6f) { |
| 865 | + inverse[3] = float4_negw(); |
| 866 | + |
| 867 | + transpose_affine(inverse); |
| 868 | + |
| 869 | + // all rows/columns in orthogonal tfm have same magnitude with uniform scale |
| 870 | + float4 invScaleX = float4m(1.0f / scaleX); // inverse squared |
| 871 | + |
| 872 | + // scale the rotation matrix by scaling inverse |
| 873 | + inverse[0] *= invScaleX; |
| 874 | + inverse[1] *= invScaleX; |
| 875 | + inverse[2] *= invScaleX; |
| 876 | + |
| 877 | + // handle the translation |
| 878 | + inverse[3] = inverse * (-mtx[3]); |
| 879 | + |
| 880 | + success = true; |
| 881 | + } |
| 882 | + |
| 883 | + return inverse; |
| 884 | +} |
| 885 | + |
| 886 | +float4x4 inverse_trs(const float4x4& mtx) |
| 887 | +{ |
| 888 | + bool success = false; |
| 889 | + |
| 890 | + float4x4 inverse(mtx); |
| 891 | + |
| 892 | + // TODO: fix handling of failure |
| 893 | + // compute the scaling terms (4 dots) |
| 894 | + // float3 scale = calcScaleSquaredRowTfm(m); |
| 895 | + // if (all(scale > float3(1e-6f)) { |
| 896 | + inverse[3] = float4_negw(); // neccessary for simple inverse to hold |
| 897 | + |
| 898 | + transpose_affine(inverse); |
| 899 | + |
| 900 | + // this is cheaper than 3 dot's above, just mul/add |
| 901 | + float4 invScale = recip(inverse[0]*inverse[0] + |
| 902 | + inverse[1]*inverse[1] + |
| 903 | + inverse[2]*inverse[2]); |
| 904 | + |
| 905 | + // scale the rotation matrix by scaling inverse |
| 906 | + inverse[0] *= invScale; |
| 907 | + inverse[1] *= invScale; |
| 908 | + inverse[2] *= invScale; |
| 909 | + inverse[3] = inverse * (-mtx[3]); |
| 910 | + |
| 911 | + success = true; |
| 912 | + //} |
| 913 | + |
| 914 | + return inverse; |
| 915 | +} |
| 916 | + |
| 917 | +// TODO: make ctor to avoid returning large matrix |
| 918 | +float4x4 float4x4m(char axis, float angleInRadians) |
| 919 | +{ |
| 920 | + float sinTheta, cosTheta; |
| 921 | + sincosf(angleInRadians, sinTheta, cosTheta); |
| 922 | + |
| 923 | + float4x4 m; |
| 924 | + m[3] = float4_posw(); |
| 925 | + |
| 926 | + switch(axis) { |
| 927 | + case 'x': |
| 928 | + { |
| 929 | + m[0] = float4_posx(); |
| 930 | + m[1] = float4m(0.0f, cosTheta, sinTheta, 0.0f); |
| 931 | + m[2] = float4m(0.0f, -sinTheta, cosTheta, 0.0f); |
| 932 | + break; |
| 933 | + } |
| 934 | + |
| 935 | + case 'y': |
| 936 | + { |
| 937 | + m[0] = float4m(cosTheta, 0.0f, -sinTheta, 0.0f); |
| 938 | + m[1] = float4_posy(); |
| 939 | + m[2] = float4m(sinTheta, 0.0f, cosTheta, 0.0f); |
| 940 | + break; |
| 941 | + } |
| 942 | + |
| 943 | + case 'z': |
| 944 | + { |
| 945 | + m[0] = float4m( cosTheta, sinTheta, 0.0f, 0.0f); |
| 946 | + m[1] = float4m(-sinTheta, cosTheta, 0.0f, 0.0f); |
| 947 | + m[2] = float4_posz(); |
| 948 | + break; |
| 949 | + } |
| 950 | + } |
| 951 | + return m; |
| 952 | +} |
| 953 | + |
| 954 | + |
| 955 | +#endif // SIMD_FLOAT |
| 956 | + |
811 | 957 | } // namespace SIMD_NAMESPACE
|
812 | 958 |
|
813 | 959 | #endif
|
|
0 commit comments