diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index dec6446cd7653b..065c0f4fd08aaf 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -16068,18 +16068,21 @@ public static Vector512 Invoke(Vector512 x) public static Vector128 Invoke(Vector128 x) { - Vector128 uxMasked = Vector128.Abs(x).AsUInt32(); - if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + Vector128 uxMasked = Vector128.Abs(x); + if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector128 r = uxMasked.AsSingle(); Vector128 almHuge = Vector128.Create(AlmHuge); - Vector128 dn = ((r + Vector128.Create(float.Pi / 2)) * Vector128.Create(1 / float.Pi)) + almHuge; + Vector128 dn = MultiplyAddEstimateOperator.Invoke(uxMasked + Vector128.Create(float.Pi / 2), Vector128.Create(1 / float.Pi), almHuge); Vector128 odd = dn.AsUInt32() << 31; dn = dn - almHuge - Vector128.Create(0.5f); - Vector128 f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2)); + + Vector128 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(-float.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail1), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail2), f); // POLY_EVAL_ODD_9 Vector128 f2 = f * f; @@ -16095,18 +16098,21 @@ public static Vector128 Invoke(Vector128 x) public static Vector256 Invoke(Vector256 x) { - Vector256 uxMasked = Vector256.Abs(x).AsUInt32(); - if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + Vector256 uxMasked = Vector256.Abs(x); + if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector256 r = uxMasked.AsSingle(); Vector256 almHuge = Vector256.Create(AlmHuge); - Vector256 dn = ((r + Vector256.Create(float.Pi / 2)) * Vector256.Create(1 / float.Pi)) + almHuge; + Vector256 dn = MultiplyAddEstimateOperator.Invoke(uxMasked + Vector256.Create(float.Pi / 2), Vector256.Create(1 / float.Pi), almHuge); Vector256 odd = dn.AsUInt32() << 31; dn = dn - almHuge - Vector256.Create(0.5f); - Vector256 f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2)); + + Vector256 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(-float.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail1), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail2), f); // POLY_EVAL_ODD_9 Vector256 f2 = f * f; @@ -16122,18 +16128,21 @@ public static Vector256 Invoke(Vector256 x) public static Vector512 Invoke(Vector512 x) { - Vector512 uxMasked = Vector512.Abs(x).AsUInt32(); - if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + Vector512 uxMasked = Vector512.Abs(x); + if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector512 r = uxMasked.AsSingle(); Vector512 almHuge = Vector512.Create(AlmHuge); - Vector512 dn = ((r + Vector512.Create(float.Pi / 2)) * Vector512.Create(1 / float.Pi)) + almHuge; + Vector512 dn = MultiplyAddEstimateOperator.Invoke(uxMasked + Vector512.Create(float.Pi / 2), Vector512.Create(1 / float.Pi), almHuge); Vector512 odd = dn.AsUInt32() << 31; dn = dn - almHuge - Vector512.Create(0.5f); - Vector512 f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2)); + + Vector512 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(-float.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail1), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail2), f); // POLY_EVAL_ODD_9 Vector512 f2 = f * f; @@ -16171,18 +16180,21 @@ public static Vector512 Invoke(Vector512 x) public static Vector128 Invoke(Vector128 x) { - Vector128 uxMasked = Vector128.Abs(x).AsUInt64(); - if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + Vector128 uxMasked = Vector128.Abs(x); + if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector128 r = uxMasked.AsDouble(); Vector128 almHuge = Vector128.Create(AlmHuge); - Vector128 dn = (r * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge; + Vector128 dn = (uxMasked * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge; Vector128 odd = dn.AsUInt64() << 63; dn = dn - almHuge - Vector128.Create(0.5); - Vector128 f = r + (dn * Vector128.Create(-double.Pi)) + (dn * Vector128.Create(Pi_Tail2)) + (dn * Vector128.Create(Pi_Tail3)); + + Vector128 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(-double.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail3), f); // POLY_EVAL_ODD_17 Vector128 f2 = f * f; @@ -16203,18 +16215,21 @@ public static Vector128 Invoke(Vector128 x) public static Vector256 Invoke(Vector256 x) { - Vector256 uxMasked = Vector256.Abs(x).AsUInt64(); - if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + Vector256 uxMasked = Vector256.Abs(x); + if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector256 r = uxMasked.AsDouble(); Vector256 almHuge = Vector256.Create(AlmHuge); - Vector256 dn = (r * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge; + Vector256 dn = (uxMasked * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge; Vector256 odd = dn.AsUInt64() << 63; dn = dn - almHuge - Vector256.Create(0.5); - Vector256 f = r + (dn * Vector256.Create(-double.Pi)) + (dn * Vector256.Create(Pi_Tail2)) + (dn * Vector256.Create(Pi_Tail3)); + + Vector256 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(-double.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail3), f); // POLY_EVAL_ODD_17 Vector256 f2 = f * f; @@ -16235,18 +16250,21 @@ public static Vector256 Invoke(Vector256 x) public static Vector512 Invoke(Vector512 x) { - Vector512 uxMasked = Vector512.Abs(x).AsUInt64(); - if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + Vector512 uxMasked = Vector512.Abs(x); + if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector512 r = uxMasked.AsDouble(); Vector512 almHuge = Vector512.Create(AlmHuge); - Vector512 dn = (r * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge; + Vector512 dn = (uxMasked * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge; Vector512 odd = dn.AsUInt64() << 63; dn = dn - almHuge - Vector512.Create(0.5); - Vector512 f = r + (dn * Vector512.Create(-double.Pi)) + (dn * Vector512.Create(Pi_Tail2)) + (dn * Vector512.Create(Pi_Tail3)); + + Vector512 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(-double.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail3), f); // POLY_EVAL_ODD_17 Vector512 f2 = f * f; @@ -16526,20 +16544,21 @@ public static Vector512 Invoke(Vector512 x) public static Vector128 Invoke(Vector128 x) { - Vector128 sign = x.AsUInt32() & Vector128.Create(~SignMask); - Vector128 uxMasked = Vector128.Abs(x).AsUInt32(); - - if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + Vector128 uxMasked = Vector128.Abs(x); + if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector128 r = uxMasked.AsSingle(); Vector128 almHuge = Vector128.Create(AlmHuge); - Vector128 dn = (r * Vector128.Create(1 / float.Pi)) + almHuge; + Vector128 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector128.Create(1 / float.Pi), almHuge); Vector128 odd = dn.AsUInt32() << 31; dn -= almHuge; - Vector128 f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2)); + + Vector128 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(-float.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail1), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail2), f); // POLY_EVAL_ODD_9 Vector128 f2 = f * f; @@ -16550,25 +16569,26 @@ public static Vector128 Invoke(Vector128 x) Vector128 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); Vector128 poly = f * a3; - return (poly.AsUInt32() ^ sign ^ odd).AsSingle(); + return (poly.AsUInt32() ^ (x.AsUInt32() & Vector128.Create(~SignMask)) ^ odd).AsSingle(); } public static Vector256 Invoke(Vector256 x) { - Vector256 sign = x.AsUInt32() & Vector256.Create(~SignMask); - Vector256 uxMasked = Vector256.Abs(x).AsUInt32(); - - if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + Vector256 uxMasked = Vector256.Abs(x); + if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector256 r = uxMasked.AsSingle(); Vector256 almHuge = Vector256.Create(AlmHuge); - Vector256 dn = (r * Vector256.Create(1 / float.Pi)) + almHuge; + Vector256 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector256.Create(1 / float.Pi), almHuge); Vector256 odd = dn.AsUInt32() << 31; dn -= almHuge; - Vector256 f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2)); + + Vector256 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(-float.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail1), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail2), f); // POLY_EVAL_ODD_9 Vector256 f2 = f * f; @@ -16579,25 +16599,26 @@ public static Vector256 Invoke(Vector256 x) Vector256 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); Vector256 poly = f * a3; - return (poly.AsUInt32() ^ sign ^ odd).AsSingle(); + return (poly.AsUInt32() ^ (x.AsUInt32() & Vector256.Create(~SignMask)) ^ odd).AsSingle(); } public static Vector512 Invoke(Vector512 x) { - Vector512 sign = x.AsUInt32() & Vector512.Create(~SignMask); - Vector512 uxMasked = Vector512.Abs(x).AsUInt32(); - - if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + Vector512 uxMasked = Vector512.Abs(x); + if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector512 r = uxMasked.AsSingle(); Vector512 almHuge = Vector512.Create(AlmHuge); - Vector512 dn = (r * Vector512.Create(1 / float.Pi)) + almHuge; + Vector512 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector512.Create(1 / float.Pi), almHuge); Vector512 odd = dn.AsUInt32() << 31; dn -= almHuge; - Vector512 f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2)); + + Vector512 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(-float.Pi), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail1), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail2), f); // POLY_EVAL_ODD_9 Vector512 f2 = f * f; @@ -16608,7 +16629,7 @@ public static Vector512 Invoke(Vector512 x) Vector512 a3 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); Vector512 poly = f * a3; - return (poly.AsUInt32() ^ sign ^ odd).AsSingle(); + return (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask)) ^ odd).AsSingle(); } } @@ -16635,20 +16656,17 @@ public static Vector512 Invoke(Vector512 x) public static Vector128 Invoke(Vector128 x) { - Vector128 sign = x.AsUInt64() & Vector128.Create(~SignMask); - Vector128 uxMasked = Vector128.Abs(x).AsUInt64(); - - if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue))) + Vector128 uxMasked = Vector128.Abs(x); + if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector128 r = uxMasked.AsDouble(); Vector128 almHuge = Vector128.Create(AlmHuge); - Vector128 dn = (r * Vector128.Create(1 / double.Pi)) + almHuge; + Vector128 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector128.Create(1 / double.Pi), almHuge); Vector128 odd = dn.AsUInt64() << 63; dn -= almHuge; - Vector128 f = r - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2)); + Vector128 f = uxMasked - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2)); // POLY_EVAL_ODD_17 Vector128 f2 = f * f; @@ -16664,25 +16682,22 @@ public static Vector128 Invoke(Vector128 x) Vector128 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); Vector128 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); - return (poly.AsUInt64() ^ sign ^ odd).AsDouble(); + return (poly.AsUInt64() ^ (x.AsUInt64() & Vector128.Create(~SignMask)) ^ odd).AsDouble(); } public static Vector256 Invoke(Vector256 x) { - Vector256 sign = x.AsUInt64() & Vector256.Create(~SignMask); - Vector256 uxMasked = Vector256.Abs(x).AsUInt64(); - - if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue))) + Vector256 uxMasked = Vector256.Abs(x); + if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector256 r = uxMasked.AsDouble(); Vector256 almHuge = Vector256.Create(AlmHuge); - Vector256 dn = (r * Vector256.Create(1 / double.Pi)) + almHuge; + Vector256 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector256.Create(1 / double.Pi), almHuge); Vector256 odd = dn.AsUInt64() << 63; dn -= almHuge; - Vector256 f = r - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2)); + Vector256 f = uxMasked - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2)); // POLY_EVAL_ODD_17 Vector256 f2 = f * f; @@ -16698,25 +16713,22 @@ public static Vector256 Invoke(Vector256 x) Vector256 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); Vector256 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); - return (poly.AsUInt64() ^ sign ^ odd).AsDouble(); + return (poly.AsUInt64() ^ (x.AsUInt64() & Vector256.Create(~SignMask)) ^ odd).AsDouble(); } public static Vector512 Invoke(Vector512 x) { - Vector512 sign = x.AsUInt64() & Vector512.Create(~SignMask); - Vector512 uxMasked = Vector512.Abs(x).AsUInt64(); - - if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue))) + Vector512 uxMasked = Vector512.Abs(x); + if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue))) { return ApplyScalar(x); } - Vector512 r = uxMasked.AsDouble(); Vector512 almHuge = Vector512.Create(AlmHuge); - Vector512 dn = (r * Vector512.Create(1 / double.Pi)) + almHuge; + Vector512 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector512.Create(1 / double.Pi), almHuge); Vector512 odd = dn.AsUInt64() << 63; dn -= almHuge; - Vector512 f = r - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2)); + Vector512 f = uxMasked - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2)); // POLY_EVAL_ODD_17 Vector512 f2 = f * f; @@ -16732,7 +16744,7 @@ public static Vector512 Invoke(Vector512 x) Vector512 b2 = MultiplyAddEstimateOperator.Invoke(f10, a3, f14 * a4); Vector512 poly = MultiplyAddEstimateOperator.Invoke(f, b1 + b2, f); - return (poly.AsUInt64() ^ sign ^ odd).AsDouble(); + return (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask)) ^ odd).AsDouble(); } } @@ -16910,22 +16922,357 @@ public static Vector512 Invoke(Vector512 t) internal readonly struct TanOperator : IUnaryOperator where T : ITrigonometricFunctions { - public static bool Vectorizable => false; // TODO: Vectorize + // This code is based on `vrs4_tan` and `vrd2_tan` from amd/aocl-libm-ose + // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Implementation notes from amd/aocl-libm-ose: + // -------------------------------------------- + // A given x is reduced into the form: + // |x| = (N * π/2) + F + // Where N is an integer obtained using: + // N = round(x * 2/π) + // And F is a fraction part lying in the interval + // [-π/4, +π/4]; + // obtained as F = |x| - (N * π/2) + // Thus tan(x) is given by + // tan(x) = tan((N * π/2) + F) = tan(F) + // when N is even, = -cot(F) = -1/tan(F) + // when N is odd, tan(F) is approximated using a polynomial + // obtained from Remez approximation from Sollya. + + public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); + public static T Invoke(T x) => T.Tan(x); - public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); - public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); - public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); + + public static Vector128 Invoke(Vector128 x) + { + if (typeof(T) == typeof(float)) + { + return TanOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return TanOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + + public static Vector256 Invoke(Vector256 x) + { + if (typeof(T) == typeof(float)) + { + return TanOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return TanOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + + public static Vector512 Invoke(Vector512 x) + { + if (typeof(T) == typeof(float)) + { + return TanOperatorSingle.Invoke(x.AsSingle()).As(); + } + else + { + Debug.Assert(typeof(T) == typeof(double)); + return TanOperatorDouble.Invoke(x.AsDouble()).As(); + } + } + } + + /// float.Tan(x) + internal readonly struct TanOperatorSingle : IUnaryOperator + { + internal const uint SignMask = 0x7FFFFFFFu; + internal const uint MaxVectorizedValue = 0x49800000u; + private const float AlmHuge = 1.2582912e7f; + private const float Pi_Tail2 = 4.371139e-8f; + private const float Pi_Tail3 = 1.7151245e-15f; + private const float C1 = 0.33333358f; + private const float C2 = 0.13332522f; + private const float C3 = 0.05407107f; + private const float C4 = 0.021237267f; + private const float C5 = 0.010932301f; + private const float C6 = -1.5722344e-5f; + private const float C7 = 0.0044221194f; + + public static bool Vectorizable => true; + + public static float Invoke(float x) => float.Tan(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 uxMasked = Vector128.Abs(x); + if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector128 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector128.Create(2 / float.Pi), Vector128.Create(AlmHuge)); + Vector128 odd = dn.AsUInt32() << 31; + dn -= Vector128.Create(AlmHuge); + + Vector128 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(-float.Pi / 2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector128.Create(Pi_Tail3), f); + + // POLY_EVAL_ODD_15 + Vector128 f2 = f * f; + Vector128 f4 = f2 * f2; + Vector128 f8 = f4 * f4; + Vector128 f12 = f8 * f4; + Vector128 a1 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1)); + Vector128 a2 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3)); + Vector128 a3 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5)); + Vector128 b1 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector128 b2 = MultiplyAddEstimateOperator.Invoke(f8, a3, f12 * Vector128.Create(C7)); + Vector128 poly = MultiplyAddEstimateOperator.Invoke(f * f2, b1 + b2, f); + + Vector128 result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector128.Create(~SignMask))).AsSingle(); + return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128.Zero).AsSingle(), + result, + Vector128.Create(-1f) / result); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 uxMasked = Vector256.Abs(x); + if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector256 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector256.Create(2 / float.Pi), Vector256.Create(AlmHuge)); + Vector256 odd = dn.AsUInt32() << 31; + dn -= Vector256.Create(AlmHuge); + + Vector256 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(-float.Pi / 2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector256.Create(Pi_Tail3), f); + + // POLY_EVAL_ODD_15 + Vector256 f2 = f * f; + Vector256 f4 = f2 * f2; + Vector256 f8 = f4 * f4; + Vector256 f12 = f8 * f4; + Vector256 a1 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1)); + Vector256 a2 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C4), f2, Vector256.Create(C3)); + Vector256 a3 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C6), f2, Vector256.Create(C5)); + Vector256 b1 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector256 b2 = MultiplyAddEstimateOperator.Invoke(f8, a3, f12 * Vector256.Create(C7)); + Vector256 poly = MultiplyAddEstimateOperator.Invoke(f * f2, b1 + b2, f); + + Vector256 result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector256.Create(~SignMask))).AsSingle(); + return Vector256.ConditionalSelect(Vector256.Equals(odd, Vector256.Zero).AsSingle(), + result, + Vector256.Create(-1f) / result); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 uxMasked = Vector512.Abs(x); + if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector512 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector512.Create(2 / float.Pi), Vector512.Create(AlmHuge)); + Vector512 odd = dn.AsUInt32() << 31; + dn -= Vector512.Create(AlmHuge); + + Vector512 f = uxMasked; + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(-float.Pi / 2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail2), f); + f = MultiplyAddEstimateOperator.Invoke(dn, Vector512.Create(Pi_Tail3), f); + + // POLY_EVAL_ODD_15 + Vector512 f2 = f * f; + Vector512 f4 = f2 * f2; + Vector512 f8 = f4 * f4; + Vector512 f12 = f8 * f4; + Vector512 a1 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1)); + Vector512 a2 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3)); + Vector512 a3 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5)); + Vector512 b1 = MultiplyAddEstimateOperator.Invoke(a2, f4, a1); + Vector512 b2 = MultiplyAddEstimateOperator.Invoke(f8, a3, f12 * Vector512.Create(C7)); + Vector512 poly = MultiplyAddEstimateOperator.Invoke(f * f2, b1 + b2, f); + + Vector512 result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask))).AsSingle(); + return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512.Zero).AsSingle(), + result, + Vector512.Create(-1f) / result); + } + } + + /// double.Tan(x) + internal readonly struct TanOperatorDouble : IUnaryOperator + { + internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul; + internal const ulong MaxVectorizedValue = 0x4160000000000000ul; + private const double AlmHuge = 6.755399441055744e15; + private const double HalfPi2 = 6.123233995736766E-17; + private const double HalfPi3 = -1.4973849048591698E-33; + private const double C1 = 0.33333333333332493; + private const double C3 = 0.133333333334343; + private const double C5 = 0.0539682539203796; + private const double C7 = 0.02186948972198256; + private const double C9 = 0.008863217894198291; + private const double C11 = 0.003592298593761111; + private const double C13 = 0.0014547086183165365; + private const double C15 = 5.952456856028558E-4; + private const double C17 = 2.2190741289936845E-4; + private const double C19 = 1.3739809957985104E-4; + private const double C21 = -2.7500197359895707E-5; + private const double C23 = 9.038741690184683E-5; + private const double C25 = -4.534076545538694E-5; + private const double C27 = 2.0966522562190197E-5; + + public static bool Vectorizable => true; + + public static double Invoke(double x) => double.Tan(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 uxMasked = Vector128.Abs(x); + if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector128 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector128.Create(2 / double.Pi), Vector128.Create(AlmHuge)); + Vector128 odd = dn.AsUInt64() << 63; + dn -= Vector128.Create(AlmHuge); + Vector128 f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3); + + // POLY_EVAL_ODD_29 + Vector128 g = f * f; + Vector128 g2 = g * g; + Vector128 g3 = g * g2; + Vector128 g5 = g3 * g2; + Vector128 g7 = g5 * g2; + Vector128 g9 = g7 * g2; + Vector128 g11 = g9 * g2; + Vector128 g13 = g11 * g2; + Vector128 a1 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C3), g, Vector128.Create(C1)); + Vector128 a2 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C7), g, Vector128.Create(C5)); + Vector128 a3 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C11), g, Vector128.Create(C9)); + Vector128 a4 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C15), g, Vector128.Create(C13)); + Vector128 a5 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C19), g, Vector128.Create(C17)); + Vector128 a6 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C23), g, Vector128.Create(C21)); + Vector128 a7 = MultiplyAddEstimateOperator.Invoke(Vector128.Create(C27), g, Vector128.Create(C25)); + Vector128 b1 = MultiplyAddEstimateOperator.Invoke(g, a1, g3 * a2); + Vector128 b2 = MultiplyAddEstimateOperator.Invoke(g5, a3, g7 * a4); + Vector128 b3 = MultiplyAddEstimateOperator.Invoke(g9, a5, g11 * a6); + Vector128 q = MultiplyAddEstimateOperator.Invoke(g13, a7, b1 + b2 + b3); + Vector128 poly = MultiplyAddEstimateOperator.Invoke(f, q, f); + + Vector128 result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector128.Create(~SignMask))).AsDouble(); + return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128.Zero).AsDouble(), + result, + Vector128.Create(-1.0) / result); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 uxMasked = Vector256.Abs(x); + if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector256 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector256.Create(2 / double.Pi), Vector256.Create(AlmHuge)); + Vector256 odd = dn.AsUInt64() << 63; + dn -= Vector256.Create(AlmHuge); + Vector256 f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3); + + // POLY_EVAL_ODD_29 + Vector256 g = f * f; + Vector256 g2 = g * g; + Vector256 g3 = g * g2; + Vector256 g5 = g3 * g2; + Vector256 g7 = g5 * g2; + Vector256 g9 = g7 * g2; + Vector256 g11 = g9 * g2; + Vector256 g13 = g11 * g2; + Vector256 a1 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C3), g, Vector256.Create(C1)); + Vector256 a2 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C7), g, Vector256.Create(C5)); + Vector256 a3 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C11), g, Vector256.Create(C9)); + Vector256 a4 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C15), g, Vector256.Create(C13)); + Vector256 a5 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C19), g, Vector256.Create(C17)); + Vector256 a6 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C23), g, Vector256.Create(C21)); + Vector256 a7 = MultiplyAddEstimateOperator.Invoke(Vector256.Create(C27), g, Vector256.Create(C25)); + Vector256 b1 = MultiplyAddEstimateOperator.Invoke(g, a1, g3 * a2); + Vector256 b2 = MultiplyAddEstimateOperator.Invoke(g5, a3, g7 * a4); + Vector256 b3 = MultiplyAddEstimateOperator.Invoke(g9, a5, g11 * a6); + Vector256 q = MultiplyAddEstimateOperator.Invoke(g13, a7, b1 + b2 + b3); + Vector256 poly = MultiplyAddEstimateOperator.Invoke(f, q, f); + + Vector256 result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector256.Create(~SignMask))).AsDouble(); + return Vector256.ConditionalSelect(Vector256.Equals(odd, Vector256.Zero).AsDouble(), + result, + Vector256.Create(-1.0) / result); + } + + public static Vector512 Invoke(Vector512 x) + { + Vector512 uxMasked = Vector512.Abs(x); + if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue))) + { + return ApplyScalar(x); + } + + Vector512 dn = MultiplyAddEstimateOperator.Invoke(uxMasked, Vector512.Create(2 / double.Pi), Vector512.Create(AlmHuge)); + Vector512 odd = dn.AsUInt64() << 63; + dn -= Vector512.Create(AlmHuge); + Vector512 f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3); + + // POLY_EVAL_ODD_29 + Vector512 g = f * f; + Vector512 g2 = g * g; + Vector512 g3 = g * g2; + Vector512 g5 = g3 * g2; + Vector512 g7 = g5 * g2; + Vector512 g9 = g7 * g2; + Vector512 g11 = g9 * g2; + Vector512 g13 = g11 * g2; + Vector512 a1 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C3), g, Vector512.Create(C1)); + Vector512 a2 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C7), g, Vector512.Create(C5)); + Vector512 a3 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C11), g, Vector512.Create(C9)); + Vector512 a4 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C15), g, Vector512.Create(C13)); + Vector512 a5 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C19), g, Vector512.Create(C17)); + Vector512 a6 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C23), g, Vector512.Create(C21)); + Vector512 a7 = MultiplyAddEstimateOperator.Invoke(Vector512.Create(C27), g, Vector512.Create(C25)); + Vector512 b1 = MultiplyAddEstimateOperator.Invoke(g, a1, g3 * a2); + Vector512 b2 = MultiplyAddEstimateOperator.Invoke(g5, a3, g7 * a4); + Vector512 b3 = MultiplyAddEstimateOperator.Invoke(g9, a5, g11 * a6); + Vector512 q = MultiplyAddEstimateOperator.Invoke(g13, a7, b1 + b2 + b3); + Vector512 poly = MultiplyAddEstimateOperator.Invoke(f, q, f); + + Vector512 result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask))).AsDouble(); + return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512.Zero).AsDouble(), + result, + Vector512.Create(-1.0) / result); + } } /// T.TanPi(x) internal readonly struct TanPiOperator : IUnaryOperator where T : ITrigonometricFunctions { - public static bool Vectorizable => TanOperator.Vectorizable; + public static bool Vectorizable => false; public static T Invoke(T x) => T.TanPi(x); - public static Vector128 Invoke(Vector128 x) => TanOperator.Invoke(x * Vector128.Create(T.Pi)); - public static Vector256 Invoke(Vector256 x) => TanOperator.Invoke(x * Vector256.Create(T.Pi)); - public static Vector512 Invoke(Vector512 x) => TanOperator.Invoke(x * Vector512.Create(T.Pi)); + public static Vector128 Invoke(Vector128 x) => throw new NotSupportedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotSupportedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotSupportedException(); } /// T.Tanh(x)