diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
index dec6446cd7653b..065c0f4fd08aaf 100644
--- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
+++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs
@@ -16068,18 +16068,21 @@ public static Vector512<T> Invoke(Vector512<T> x)
 
             public static Vector128<float> Invoke(Vector128<float> x)
             {
-                Vector128<uint> uxMasked = Vector128.Abs(x).AsUInt32();
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                Vector128<float> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<CosOperatorSingle>(x);
                 }
 
-                Vector128<float> r = uxMasked.AsSingle();
                 Vector128<float> almHuge = Vector128.Create(AlmHuge);
-                Vector128<float> dn = ((r + Vector128.Create(float.Pi / 2)) * Vector128.Create(1 / float.Pi)) + almHuge;
+                Vector128<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked + Vector128.Create(float.Pi / 2), Vector128.Create(1 / float.Pi), almHuge);
                 Vector128<uint> odd = dn.AsUInt32() << 31;
                 dn = dn - almHuge - Vector128.Create(0.5f);
-                Vector128<float> f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2));
+
+                Vector128<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_9
                 Vector128<float> f2 = f * f;
@@ -16095,18 +16098,21 @@ public static Vector128<float> Invoke(Vector128<float> x)
 
             public static Vector256<float> Invoke(Vector256<float> x)
             {
-                Vector256<uint> uxMasked = Vector256.Abs(x).AsUInt32();
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                Vector256<float> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<CosOperatorSingle>(x);
                 }
 
-                Vector256<float> r = uxMasked.AsSingle();
                 Vector256<float> almHuge = Vector256.Create(AlmHuge);
-                Vector256<float> dn = ((r + Vector256.Create(float.Pi / 2)) * Vector256.Create(1 / float.Pi)) + almHuge;
+                Vector256<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked + Vector256.Create(float.Pi / 2), Vector256.Create(1 / float.Pi), almHuge);
                 Vector256<uint> odd = dn.AsUInt32() << 31;
                 dn = dn - almHuge - Vector256.Create(0.5f);
-                Vector256<float> f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2));
+
+                Vector256<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_9
                 Vector256<float> f2 = f * f;
@@ -16122,18 +16128,21 @@ public static Vector256<float> Invoke(Vector256<float> x)
 
             public static Vector512<float> Invoke(Vector512<float> x)
             {
-                Vector512<uint> uxMasked = Vector512.Abs(x).AsUInt32();
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                Vector512<float> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<CosOperatorSingle>(x);
                 }
 
-                Vector512<float> r = uxMasked.AsSingle();
                 Vector512<float> almHuge = Vector512.Create(AlmHuge);
-                Vector512<float> dn = ((r + Vector512.Create(float.Pi / 2)) * Vector512.Create(1 / float.Pi)) + almHuge;
+                Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked + Vector512.Create(float.Pi / 2), Vector512.Create(1 / float.Pi), almHuge);
                 Vector512<uint> odd = dn.AsUInt32() << 31;
                 dn = dn - almHuge - Vector512.Create(0.5f);
-                Vector512<float> f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2));
+
+                Vector512<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_9
                 Vector512<float> f2 = f * f;
@@ -16171,18 +16180,21 @@ public static Vector512<float> Invoke(Vector512<float> x)
 
             public static Vector128<double> Invoke(Vector128<double> x)
             {
-                Vector128<ulong> uxMasked = Vector128.Abs(x).AsUInt64();
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                Vector128<double> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<CosOperatorDouble>(x);
                 }
 
-                Vector128<double> r = uxMasked.AsDouble();
                 Vector128<double> almHuge = Vector128.Create(AlmHuge);
-                Vector128<double> dn = (r * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge;
+                Vector128<double> dn = (uxMasked * Vector128.Create(1 / double.Pi)) + Vector128.Create(double.Pi / 2) + almHuge;
                 Vector128<ulong> odd = dn.AsUInt64() << 63;
                 dn = dn - almHuge - Vector128.Create(0.5);
-                Vector128<double> f = r + (dn * Vector128.Create(-double.Pi)) + (dn * Vector128.Create(Pi_Tail2)) + (dn * Vector128.Create(Pi_Tail3));
+
+                Vector128<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector128.Create(Pi_Tail3), f);
 
                 // POLY_EVAL_ODD_17
                 Vector128<double> f2 = f * f;
@@ -16203,18 +16215,21 @@ public static Vector128<double> Invoke(Vector128<double> x)
 
             public static Vector256<double> Invoke(Vector256<double> x)
             {
-                Vector256<ulong> uxMasked = Vector256.Abs(x).AsUInt64();
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                Vector256<double> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<CosOperatorDouble>(x);
                 }
 
-                Vector256<double> r = uxMasked.AsDouble();
                 Vector256<double> almHuge = Vector256.Create(AlmHuge);
-                Vector256<double> dn = (r * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge;
+                Vector256<double> dn = (uxMasked * Vector256.Create(1 / double.Pi)) + Vector256.Create(double.Pi / 2) + almHuge;
                 Vector256<ulong> odd = dn.AsUInt64() << 63;
                 dn = dn - almHuge - Vector256.Create(0.5);
-                Vector256<double> f = r + (dn * Vector256.Create(-double.Pi)) + (dn * Vector256.Create(Pi_Tail2)) + (dn * Vector256.Create(Pi_Tail3));
+
+                Vector256<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector256.Create(Pi_Tail3), f);
 
                 // POLY_EVAL_ODD_17
                 Vector256<double> f2 = f * f;
@@ -16235,18 +16250,21 @@ public static Vector256<double> Invoke(Vector256<double> x)
 
             public static Vector512<double> Invoke(Vector512<double> x)
             {
-                Vector512<ulong> uxMasked = Vector512.Abs(x).AsUInt64();
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                Vector512<double> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<CosOperatorDouble>(x);
                 }
 
-                Vector512<double> r = uxMasked.AsDouble();
                 Vector512<double> almHuge = Vector512.Create(AlmHuge);
-                Vector512<double> dn = (r * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge;
+                Vector512<double> dn = (uxMasked * Vector512.Create(1 / double.Pi)) + Vector512.Create(double.Pi / 2) + almHuge;
                 Vector512<ulong> odd = dn.AsUInt64() << 63;
                 dn = dn - almHuge - Vector512.Create(0.5);
-                Vector512<double> f = r + (dn * Vector512.Create(-double.Pi)) + (dn * Vector512.Create(Pi_Tail2)) + (dn * Vector512.Create(Pi_Tail3));
+
+                Vector512<double> f = uxMasked;
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(-double.Pi), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<double>.Invoke(dn, Vector512.Create(Pi_Tail3), f);
 
                 // POLY_EVAL_ODD_17
                 Vector512<double> f2 = f * f;
@@ -16526,20 +16544,21 @@ public static Vector512<T> Invoke(Vector512<T> x)
 
             public static Vector128<float> Invoke(Vector128<float> x)
             {
-                Vector128<uint> sign = x.AsUInt32() & Vector128.Create(~SignMask);
-                Vector128<uint> uxMasked = Vector128.Abs(x).AsUInt32();
-
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                Vector128<float> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<SinOperatorSingle>(x);
                 }
 
-                Vector128<float> r = uxMasked.AsSingle();
                 Vector128<float> almHuge = Vector128.Create(AlmHuge);
-                Vector128<float> dn = (r * Vector128.Create(1 / float.Pi)) + almHuge;
+                Vector128<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector128.Create(1 / float.Pi), almHuge);
                 Vector128<uint> odd = dn.AsUInt32() << 31;
                 dn -= almHuge;
-                Vector128<float> f = r + (dn * Vector128.Create(-float.Pi)) + (dn * Vector128.Create(Pi_Tail1)) + (dn * Vector128.Create(Pi_Tail2));
+
+                Vector128<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_9
                 Vector128<float> f2 = f * f;
@@ -16550,25 +16569,26 @@ public static Vector128<float> Invoke(Vector128<float> x)
                 Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
                 Vector128<float> poly = f * a3;
 
-                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
+                return (poly.AsUInt32() ^ (x.AsUInt32() & Vector128.Create(~SignMask)) ^ odd).AsSingle();
             }
 
             public static Vector256<float> Invoke(Vector256<float> x)
             {
-                Vector256<uint> sign = x.AsUInt32() & Vector256.Create(~SignMask);
-                Vector256<uint> uxMasked = Vector256.Abs(x).AsUInt32();
-
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                Vector256<float> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<SinOperatorSingle>(x);
                 }
 
-                Vector256<float> r = uxMasked.AsSingle();
                 Vector256<float> almHuge = Vector256.Create(AlmHuge);
-                Vector256<float> dn = (r * Vector256.Create(1 / float.Pi)) + almHuge;
+                Vector256<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector256.Create(1 / float.Pi), almHuge);
                 Vector256<uint> odd = dn.AsUInt32() << 31;
                 dn -= almHuge;
-                Vector256<float> f = r + (dn * Vector256.Create(-float.Pi)) + (dn * Vector256.Create(Pi_Tail1)) + (dn * Vector256.Create(Pi_Tail2));
+
+                Vector256<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_9
                 Vector256<float> f2 = f * f;
@@ -16579,25 +16599,26 @@ public static Vector256<float> Invoke(Vector256<float> x)
                 Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
                 Vector256<float> poly = f * a3;
 
-                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
+                return (poly.AsUInt32() ^ (x.AsUInt32() & Vector256.Create(~SignMask)) ^ odd).AsSingle();
             }
 
             public static Vector512<float> Invoke(Vector512<float> x)
             {
-                Vector512<uint> sign = x.AsUInt32() & Vector512.Create(~SignMask);
-                Vector512<uint> uxMasked = Vector512.Abs(x).AsUInt32();
-
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                Vector512<float> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<SinOperatorSingle>(x);
                 }
 
-                Vector512<float> r = uxMasked.AsSingle();
                 Vector512<float> almHuge = Vector512.Create(AlmHuge);
-                Vector512<float> dn = (r * Vector512.Create(1 / float.Pi)) + almHuge;
+                Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector512.Create(1 / float.Pi), almHuge);
                 Vector512<uint> odd = dn.AsUInt32() << 31;
                 dn -= almHuge;
-                Vector512<float> f = r + (dn * Vector512.Create(-float.Pi)) + (dn * Vector512.Create(Pi_Tail1)) + (dn * Vector512.Create(Pi_Tail2));
+
+                Vector512<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(-float.Pi), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail1), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
 
                 // POLY_EVAL_ODD_9
                 Vector512<float> f2 = f * f;
@@ -16608,7 +16629,7 @@ public static Vector512<float> Invoke(Vector512<float> x)
                 Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
                 Vector512<float> poly = f * a3;
 
-                return (poly.AsUInt32() ^ sign ^ odd).AsSingle();
+                return (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask)) ^ odd).AsSingle();
             }
         }
 
@@ -16635,20 +16656,17 @@ public static Vector512<float> Invoke(Vector512<float> x)
 
             public static Vector128<double> Invoke(Vector128<double> x)
             {
-                Vector128<ulong> sign = x.AsUInt64() & Vector128.Create(~SignMask);
-                Vector128<ulong> uxMasked = Vector128.Abs(x).AsUInt64();
-
-                if (Vector128.GreaterThanAny(uxMasked, Vector128.Create(MaxVectorizedValue)))
+                Vector128<double> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<SinOperatorDouble>(x);
                 }
 
-                Vector128<double> r = uxMasked.AsDouble();
                 Vector128<double> almHuge = Vector128.Create(AlmHuge);
-                Vector128<double> dn = (r * Vector128.Create(1 / double.Pi)) + almHuge;
+                Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(1 / double.Pi), almHuge);
                 Vector128<ulong> odd = dn.AsUInt64() << 63;
                 dn -= almHuge;
-                Vector128<double> f = r - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2));
+                Vector128<double> f = uxMasked - (dn * Vector128.Create(double.Pi)) - (dn * Vector128.Create(Pi_Tail1)) - (dn * Vector128.Create(Pi_Tail2));
 
                 // POLY_EVAL_ODD_17
                 Vector128<double> f2 = f * f;
@@ -16664,25 +16682,22 @@ public static Vector128<double> Invoke(Vector128<double> x)
                 Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
                 Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
 
-                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
+                return (poly.AsUInt64() ^ (x.AsUInt64() & Vector128.Create(~SignMask)) ^ odd).AsDouble();
             }
 
             public static Vector256<double> Invoke(Vector256<double> x)
             {
-                Vector256<ulong> sign = x.AsUInt64() & Vector256.Create(~SignMask);
-                Vector256<ulong> uxMasked = Vector256.Abs(x).AsUInt64();
-
-                if (Vector256.GreaterThanAny(uxMasked, Vector256.Create(MaxVectorizedValue)))
+                Vector256<double> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<SinOperatorDouble>(x);
                 }
 
-                Vector256<double> r = uxMasked.AsDouble();
                 Vector256<double> almHuge = Vector256.Create(AlmHuge);
-                Vector256<double> dn = (r * Vector256.Create(1 / double.Pi)) + almHuge;
+                Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(1 / double.Pi), almHuge);
                 Vector256<ulong> odd = dn.AsUInt64() << 63;
                 dn -= almHuge;
-                Vector256<double> f = r - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2));
+                Vector256<double> f = uxMasked - (dn * Vector256.Create(double.Pi)) - (dn * Vector256.Create(Pi_Tail1)) - (dn * Vector256.Create(Pi_Tail2));
 
                 // POLY_EVAL_ODD_17
                 Vector256<double> f2 = f * f;
@@ -16698,25 +16713,22 @@ public static Vector256<double> Invoke(Vector256<double> x)
                 Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
                 Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
 
-                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
+                return (poly.AsUInt64() ^ (x.AsUInt64() & Vector256.Create(~SignMask)) ^ odd).AsDouble();
             }
 
             public static Vector512<double> Invoke(Vector512<double> x)
             {
-                Vector512<ulong> sign = x.AsUInt64() & Vector512.Create(~SignMask);
-                Vector512<ulong> uxMasked = Vector512.Abs(x).AsUInt64();
-
-                if (Vector512.GreaterThanAny(uxMasked, Vector512.Create(MaxVectorizedValue)))
+                Vector512<double> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue)))
                 {
                     return ApplyScalar<SinOperatorDouble>(x);
                 }
 
-                Vector512<double> r = uxMasked.AsDouble();
                 Vector512<double> almHuge = Vector512.Create(AlmHuge);
-                Vector512<double> dn = (r * Vector512.Create(1 / double.Pi)) + almHuge;
+                Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(1 / double.Pi), almHuge);
                 Vector512<ulong> odd = dn.AsUInt64() << 63;
                 dn -= almHuge;
-                Vector512<double> f = r - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2));
+                Vector512<double> f = uxMasked - (dn * Vector512.Create(double.Pi)) - (dn * Vector512.Create(Pi_Tail1)) - (dn * Vector512.Create(Pi_Tail2));
 
                 // POLY_EVAL_ODD_17
                 Vector512<double> f2 = f * f;
@@ -16732,7 +16744,7 @@ public static Vector512<double> Invoke(Vector512<double> x)
                 Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(f10, a3, f14 * a4);
                 Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, b1 + b2, f);
 
-                return (poly.AsUInt64() ^ sign ^ odd).AsDouble();
+                return (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask)) ^ odd).AsDouble();
             }
         }
 
@@ -16910,22 +16922,357 @@ public static Vector512<T> Invoke(Vector512<T> t)
         internal readonly struct TanOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => false; // TODO: Vectorize
+            // This code is based on `vrs4_tan` and `vrd2_tan` from amd/aocl-libm-ose
+            // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved.
+            //
+            // Licensed under the BSD 3-Clause "New" or "Revised" License
+            // See THIRD-PARTY-NOTICES.TXT for the full license text
+
+            // Implementation notes from amd/aocl-libm-ose:
+            // --------------------------------------------
+            // A given x is reduced into the form:
+            //          |x| = (N * π/2) + F
+            // Where N is an integer obtained using:
+            //         N = round(x * 2/π)
+            // And F is a fraction part lying in the interval
+            //         [-π/4, +π/4];
+            // obtained as F = |x| - (N * π/2)
+            // Thus tan(x) is given by
+            //         tan(x) = tan((N * π/2) + F) = tan(F)
+            //         when N is even, = -cot(F) = -1/tan(F)
+            //         when N is odd, tan(F) is approximated using a polynomial
+            //         obtained from Remez approximation from Sollya.
+
+            public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double);
+
             public static T Invoke(T x) => T.Tan(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
-            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
-            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
+
+            public static Vector128<T> Invoke(Vector128<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TanOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TanOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector256<T> Invoke(Vector256<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TanOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TanOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+
+            public static Vector512<T> Invoke(Vector512<T> x)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    return TanOperatorSingle.Invoke(x.AsSingle()).As<float, T>();
+                }
+                else
+                {
+                    Debug.Assert(typeof(T) == typeof(double));
+                    return TanOperatorDouble.Invoke(x.AsDouble()).As<double, T>();
+                }
+            }
+        }
+
+        /// <summary>float.Tan(x)</summary>
+        internal readonly struct TanOperatorSingle : IUnaryOperator<float, float>
+        {
+            internal const uint SignMask = 0x7FFFFFFFu;
+            internal const uint MaxVectorizedValue = 0x49800000u;
+            private const float AlmHuge = 1.2582912e7f;
+            private const float Pi_Tail2 = 4.371139e-8f;
+            private const float Pi_Tail3 = 1.7151245e-15f;
+            private const float C1 = 0.33333358f;
+            private const float C2 = 0.13332522f;
+            private const float C3 = 0.05407107f;
+            private const float C4 = 0.021237267f;
+            private const float C5 = 0.010932301f;
+            private const float C6 = -1.5722344e-5f;
+            private const float C7 = 0.0044221194f;
+
+            public static bool Vectorizable => true;
+
+            public static float Invoke(float x) => float.Tan(x);
+
+            public static Vector128<float> Invoke(Vector128<float> x)
+            {
+                Vector128<float> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt32(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorSingle>(x);
+                }
+
+                Vector128<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector128.Create(2 / float.Pi), Vector128.Create(AlmHuge));
+                Vector128<uint> odd = dn.AsUInt32() << 31;
+                dn -= Vector128.Create(AlmHuge);
+
+                Vector128<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(-float.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector128.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_15
+                Vector128<float> f2 = f * f;
+                Vector128<float> f4 = f2 * f2;
+                Vector128<float> f8 = f4 * f4;
+                Vector128<float> f12 = f8 * f4;
+                Vector128<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C2), f2, Vector128.Create(C1));
+                Vector128<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C4), f2, Vector128.Create(C3));
+                Vector128<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector128.Create(C6), f2, Vector128.Create(C5));
+                Vector128<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector128<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector128.Create(C7));
+                Vector128<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
+
+                Vector128<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector128.Create(~SignMask))).AsSingle();
+                return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128<uint>.Zero).AsSingle(),
+                    result,
+                    Vector128.Create(-1f) / result);
+            }
+
+            public static Vector256<float> Invoke(Vector256<float> x)
+            {
+                Vector256<float> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt32(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorSingle>(x);
+                }
+
+                Vector256<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector256.Create(2 / float.Pi), Vector256.Create(AlmHuge));
+                Vector256<uint> odd = dn.AsUInt32() << 31;
+                dn -= Vector256.Create(AlmHuge);
+
+                Vector256<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(-float.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector256.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_15
+                Vector256<float> f2 = f * f;
+                Vector256<float> f4 = f2 * f2;
+                Vector256<float> f8 = f4 * f4;
+                Vector256<float> f12 = f8 * f4;
+                Vector256<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C2), f2, Vector256.Create(C1));
+                Vector256<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C4), f2, Vector256.Create(C3));
+                Vector256<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector256.Create(C6), f2, Vector256.Create(C5));
+                Vector256<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector256<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector256.Create(C7));
+                Vector256<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
+
+                Vector256<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector256.Create(~SignMask))).AsSingle();
+                return Vector256.ConditionalSelect(Vector256.Equals(odd, Vector256<uint>.Zero).AsSingle(),
+                    result,
+                    Vector256.Create(-1f) / result);
+            }
+
+            public static Vector512<float> Invoke(Vector512<float> x)
+            {
+                Vector512<float> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt32(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorSingle>(x);
+                }
+
+                Vector512<float> dn = MultiplyAddEstimateOperator<float>.Invoke(uxMasked, Vector512.Create(2 / float.Pi), Vector512.Create(AlmHuge));
+                Vector512<uint> odd = dn.AsUInt32() << 31;
+                dn -= Vector512.Create(AlmHuge);
+
+                Vector512<float> f = uxMasked;
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(-float.Pi / 2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail2), f);
+                f = MultiplyAddEstimateOperator<float>.Invoke(dn, Vector512.Create(Pi_Tail3), f);
+
+                // POLY_EVAL_ODD_15
+                Vector512<float> f2 = f * f;
+                Vector512<float> f4 = f2 * f2;
+                Vector512<float> f8 = f4 * f4;
+                Vector512<float> f12 = f8 * f4;
+                Vector512<float> a1 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C2), f2, Vector512.Create(C1));
+                Vector512<float> a2 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C4), f2, Vector512.Create(C3));
+                Vector512<float> a3 = MultiplyAddEstimateOperator<float>.Invoke(Vector512.Create(C6), f2, Vector512.Create(C5));
+                Vector512<float> b1 = MultiplyAddEstimateOperator<float>.Invoke(a2, f4, a1);
+                Vector512<float> b2 = MultiplyAddEstimateOperator<float>.Invoke(f8, a3, f12 * Vector512.Create(C7));
+                Vector512<float> poly = MultiplyAddEstimateOperator<float>.Invoke(f * f2, b1 + b2, f);
+
+                Vector512<float> result = (poly.AsUInt32() ^ (x.AsUInt32() & Vector512.Create(~SignMask))).AsSingle();
+                return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<uint>.Zero).AsSingle(),
+                    result,
+                    Vector512.Create(-1f) / result);
+            }
+        }
+
+        /// <summary>double.Tan(x)</summary>
+        internal readonly struct TanOperatorDouble : IUnaryOperator<double, double>
+        {
+            internal const ulong SignMask = 0x7FFFFFFFFFFFFFFFul;
+            internal const ulong MaxVectorizedValue = 0x4160000000000000ul;
+            private const double AlmHuge = 6.755399441055744e15;
+            private const double HalfPi2 = 6.123233995736766E-17;
+            private const double HalfPi3 = -1.4973849048591698E-33;
+            private const double C1 = 0.33333333333332493;
+            private const double C3 = 0.133333333334343;
+            private const double C5 = 0.0539682539203796;
+            private const double C7 = 0.02186948972198256;
+            private const double C9 = 0.008863217894198291;
+            private const double C11 = 0.003592298593761111;
+            private const double C13 = 0.0014547086183165365;
+            private const double C15 = 5.952456856028558E-4;
+            private const double C17 = 2.2190741289936845E-4;
+            private const double C19 = 1.3739809957985104E-4;
+            private const double C21 = -2.7500197359895707E-5;
+            private const double C23 = 9.038741690184683E-5;
+            private const double C25 = -4.534076545538694E-5;
+            private const double C27 = 2.0966522562190197E-5;
+
+            public static bool Vectorizable => true;
+
+            public static double Invoke(double x) => double.Tan(x);
+
+            public static Vector128<double> Invoke(Vector128<double> x)
+            {
+                Vector128<double> uxMasked = Vector128.Abs(x);
+                if (Vector128.GreaterThanAny(uxMasked.AsUInt64(), Vector128.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorDouble>(x);
+                }
+
+                Vector128<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector128.Create(2 / double.Pi), Vector128.Create(AlmHuge));
+                Vector128<ulong> odd = dn.AsUInt64() << 63;
+                dn -= Vector128.Create(AlmHuge);
+                Vector128<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
+
+                // POLY_EVAL_ODD_29
+                Vector128<double> g = f * f;
+                Vector128<double> g2 = g * g;
+                Vector128<double> g3 = g * g2;
+                Vector128<double> g5 = g3 * g2;
+                Vector128<double> g7 = g5 * g2;
+                Vector128<double> g9 = g7 * g2;
+                Vector128<double> g11 = g9 * g2;
+                Vector128<double> g13 = g11 * g2;
+                Vector128<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C3), g, Vector128.Create(C1));
+                Vector128<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C7), g, Vector128.Create(C5));
+                Vector128<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C11), g, Vector128.Create(C9));
+                Vector128<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C15), g, Vector128.Create(C13));
+                Vector128<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C19), g, Vector128.Create(C17));
+                Vector128<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C23), g, Vector128.Create(C21));
+                Vector128<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector128.Create(C27), g, Vector128.Create(C25));
+                Vector128<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
+                Vector128<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
+                Vector128<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
+                Vector128<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
+                Vector128<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
+
+                Vector128<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector128.Create(~SignMask))).AsDouble();
+                return Vector128.ConditionalSelect(Vector128.Equals(odd, Vector128<ulong>.Zero).AsDouble(),
+                    result,
+                    Vector128.Create(-1.0) / result);
+            }
+
+            public static Vector256<double> Invoke(Vector256<double> x)
+            {
+                Vector256<double> uxMasked = Vector256.Abs(x);
+                if (Vector256.GreaterThanAny(uxMasked.AsUInt64(), Vector256.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorDouble>(x);
+                }
+
+                Vector256<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector256.Create(2 / double.Pi), Vector256.Create(AlmHuge));
+                Vector256<ulong> odd = dn.AsUInt64() << 63;
+                dn -= Vector256.Create(AlmHuge);
+                Vector256<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
+
+                // POLY_EVAL_ODD_29
+                Vector256<double> g = f * f;
+                Vector256<double> g2 = g * g;
+                Vector256<double> g3 = g * g2;
+                Vector256<double> g5 = g3 * g2;
+                Vector256<double> g7 = g5 * g2;
+                Vector256<double> g9 = g7 * g2;
+                Vector256<double> g11 = g9 * g2;
+                Vector256<double> g13 = g11 * g2;
+                Vector256<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C3), g, Vector256.Create(C1));
+                Vector256<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C7), g, Vector256.Create(C5));
+                Vector256<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C11), g, Vector256.Create(C9));
+                Vector256<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C15), g, Vector256.Create(C13));
+                Vector256<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C19), g, Vector256.Create(C17));
+                Vector256<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C23), g, Vector256.Create(C21));
+                Vector256<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector256.Create(C27), g, Vector256.Create(C25));
+                Vector256<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
+                Vector256<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
+                Vector256<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
+                Vector256<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
+                Vector256<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
+
+                Vector256<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector256.Create(~SignMask))).AsDouble();
+                return Vector256.ConditionalSelect(Vector256.Equals(odd, Vector256<ulong>.Zero).AsDouble(),
+                    result,
+                    Vector256.Create(-1.0) / result);
+            }
+
+            public static Vector512<double> Invoke(Vector512<double> x)
+            {
+                Vector512<double> uxMasked = Vector512.Abs(x);
+                if (Vector512.GreaterThanAny(uxMasked.AsUInt64(), Vector512.Create(MaxVectorizedValue)))
+                {
+                    return ApplyScalar<TanOperatorDouble>(x);
+                }
+
+                Vector512<double> dn = MultiplyAddEstimateOperator<double>.Invoke(uxMasked, Vector512.Create(2 / double.Pi), Vector512.Create(AlmHuge));
+                Vector512<ulong> odd = dn.AsUInt64() << 63;
+                dn -= Vector512.Create(AlmHuge);
+                Vector512<double> f = uxMasked.AsDouble() - (dn * (double.Pi / 2)) - (dn * HalfPi2) - (dn * HalfPi3);
+
+                // POLY_EVAL_ODD_29
+                Vector512<double> g = f * f;
+                Vector512<double> g2 = g * g;
+                Vector512<double> g3 = g * g2;
+                Vector512<double> g5 = g3 * g2;
+                Vector512<double> g7 = g5 * g2;
+                Vector512<double> g9 = g7 * g2;
+                Vector512<double> g11 = g9 * g2;
+                Vector512<double> g13 = g11 * g2;
+                Vector512<double> a1 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C3), g, Vector512.Create(C1));
+                Vector512<double> a2 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C7), g, Vector512.Create(C5));
+                Vector512<double> a3 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C11), g, Vector512.Create(C9));
+                Vector512<double> a4 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C15), g, Vector512.Create(C13));
+                Vector512<double> a5 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C19), g, Vector512.Create(C17));
+                Vector512<double> a6 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C23), g, Vector512.Create(C21));
+                Vector512<double> a7 = MultiplyAddEstimateOperator<double>.Invoke(Vector512.Create(C27), g, Vector512.Create(C25));
+                Vector512<double> b1 = MultiplyAddEstimateOperator<double>.Invoke(g, a1, g3 * a2);
+                Vector512<double> b2 = MultiplyAddEstimateOperator<double>.Invoke(g5, a3, g7 * a4);
+                Vector512<double> b3 = MultiplyAddEstimateOperator<double>.Invoke(g9, a5, g11 * a6);
+                Vector512<double> q = MultiplyAddEstimateOperator<double>.Invoke(g13, a7, b1 + b2 + b3);
+                Vector512<double> poly = MultiplyAddEstimateOperator<double>.Invoke(f, q, f);
+
+                Vector512<double> result = (poly.AsUInt64() ^ (x.AsUInt64() & Vector512.Create(~SignMask))).AsDouble();
+                return Vector512.ConditionalSelect(Vector512.Equals(odd, Vector512<ulong>.Zero).AsDouble(),
+                    result,
+                    Vector512.Create(-1.0) / result);
+            }
         }
 
         /// <summary>T.TanPi(x)</summary>
         internal readonly struct TanPiOperator<T> : IUnaryOperator<T, T>
             where T : ITrigonometricFunctions<T>
         {
-            public static bool Vectorizable => TanOperator<T>.Vectorizable;
+            public static bool Vectorizable => false;
             public static T Invoke(T x) => T.TanPi(x);
-            public static Vector128<T> Invoke(Vector128<T> x) => TanOperator<T>.Invoke(x * Vector128.Create(T.Pi));
-            public static Vector256<T> Invoke(Vector256<T> x) => TanOperator<T>.Invoke(x * Vector256.Create(T.Pi));
-            public static Vector512<T> Invoke(Vector512<T> x) => TanOperator<T>.Invoke(x * Vector512.Create(T.Pi));
+            public static Vector128<T> Invoke(Vector128<T> x) => throw new NotSupportedException();
+            public static Vector256<T> Invoke(Vector256<T> x) => throw new NotSupportedException();
+            public static Vector512<T> Invoke(Vector512<T> x) => throw new NotSupportedException();
         }
 
         /// <summary>T.Tanh(x)</summary>