From 6f7651bf9d96f7194f21671f703930216768b967 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 3 Oct 2023 11:35:16 -0700 Subject: [PATCH 1/2] Adding a vectorized implementation of TensorPrimitives.Log --- .../Numerics/Tensors/TensorPrimitives.cs | 16 +- .../Tensors/TensorPrimitives.netcore.cs | 220 ++++++++++++++++++ .../Tensors/TensorPrimitives.netstandard.cs | 13 ++ .../tests/TensorPrimitivesTests.cs | 35 +++ 4 files changed, 270 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs index aaafb74ec2a8c..b1b96e55bb44f 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.cs @@ -563,20 +563,8 @@ public static unsafe int IndexOfMinMagnitude(ReadOnlySpan x) /// operating systems or architectures. /// /// - public static void Log(ReadOnlySpan x, Span destination) - { - if (x.Length > destination.Length) - { - ThrowHelper.ThrowArgument_DestinationTooShort(); - } - - ValidateInputOutputSpanNonOverlapping(x, destination); - - for (int i = 0; i < x.Length; i++) - { - destination[i] = MathF.Log(x[i]); - } - } + public static void Log(ReadOnlySpan x, Span destination) => + InvokeSpanIntoSpan(x, destination); /// Computes the element-wise base 2 logarithm of single-precision floating-point numbers in the specified tensor. /// The tensor, represented as a span. diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index 18244a42cd296..dad2e65c72d0c 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2579,6 +2579,226 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) #endif } + private readonly struct LogOperator : IUnaryOperator + { + // This code is based on `vrs4_logf` from amd/aocl-libm-ose + // Copyright (C) 2018-2019 Advanced Micro Devices, Inc. All rights reserved. + // + // Licensed under the BSD 3-Clause "New" or "Revised" License + // See THIRD-PARTY-NOTICES.TXT for the full license text + + // Spec: + // logf(x) + // = logf(x) if x ∈ F and x > 0 + // = x if x = qNaN + // = 0 if x = 1 + // = -inf if x = (-0, 0} + // = NaN otherwise + // + // Assumptions/Expectations + // - ULP is derived to be << 4 (always) + // - Some FPU Exceptions may not be available + // - Performance is at least 3x + // + // Implementation Notes: + // 1. Range Reduction: + // x = 2^n*(1+f) .... (1) + // where n is exponent and is an integer + // (1+f) is mantissa ∈ [1,2). i.e., 1 ≤ 1+f < 2 .... (2) + // + // From (1), taking log on both sides + // log(x) = log(2^n * (1+f)) + // = log(2^n) + log(1+f) + // = n*log(2) + log(1+f) .... (3) + // + // let z = 1 + f + // log(z) = log(k) + log(z) - log(k) + // log(z) = log(kz) - log(k) + // + // From (2), range of z is [1, 2) + // by simply dividing range by 'k', z is in [1/k, 2/k) .... (4) + // Best choice of k is the one which gives equal and opposite values + // at extrema +- -+ + // 1 | 2 | + // --- - 1 = - |--- - 1 | + // k | k | .... (5) + // +- -+ + // + // Solving for k, k = 3/2, + // From (4), using 'k' value, range is therefore [-0.3333, 0.3333] + // + // 2. Polynomial Approximation: + // More information refer to tools/sollya/vrs4_logf.sollya + // + // 7th Deg - Error abs: 0x1.04c4ac98p-22 rel: 0x1.2216e6f8p-19 + // 6th Deg - Error abs: 0x1.179e97d8p-19 rel: 0x1.db676c1p-17 + + private const uint V_MIN = 0x00800000; + private const uint V_MAX = 0x7F800000; + private const uint V_MASK = 0x007FFFFF; + private const uint V_OFF = 0x3F2AAAAB; + + private const float V_LN2 = 0.6931472f; + + private const float C0 = 0.0f; + private const float C1 = 1.0f; + private const float C2 = -0.5000001f; + private const float C3 = 0.33332965f; + private const float C4 = -0.24999046f; + private const float C5 = 0.20018855f; + private const float C6 = -0.16700386f; + private const float C7 = 0.13902695f; + private const float C8 = -0.1197452f; + private const float C9 = 0.14401625f; + private const float C10 = -0.13657966f; + + public static float Invoke(float x) => MathF.Log(x); + + public static Vector128 Invoke(Vector128 x) + { + Vector128 specialResult = x; + + // x is subnormal or infinity or NaN + Vector128 specialMask = Vector128.GreaterThanOrEqual(x.AsUInt32() - Vector128.Create(V_MIN), Vector128.Create(V_MAX - V_MIN)); + + if (specialMask != Vector128.Zero) + { + // float.IsZero(x) ? float.NegativeInfinity : x + Vector128 zeroMask = Vector128.Equals(x, Vector128.Zero); + + specialResult = Vector128.ConditionalSelect( + zeroMask, + Vector128.Create(float.NegativeInfinity), + specialResult + ); + + // (x < 0) ? float.NaN : x + Vector128 lessThanZeroMask = Vector128.LessThan(x, Vector128.Zero); + + specialResult = Vector128.ConditionalSelect( + lessThanZeroMask, + Vector128.Create(float.NaN), + specialResult + ); + + // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x) + Vector128 temp = zeroMask + | lessThanZeroMask + | ~Vector128.Equals(x, x) + | Vector128.Equals(x, Vector128.Create(float.PositiveInfinity)); + + // subnormal + Vector128 subnormalMask = Vector128.AndNot(specialMask.AsSingle(), temp); + + x = Vector128.ConditionalSelect( + subnormalMask, + ((x * 8388608.0f).AsUInt32() - Vector128.Create(23u << 23)).AsSingle(), + x + ); + + specialMask = temp.AsUInt32(); + } + + Vector128 vx = x.AsUInt32() - Vector128.Create(V_OFF); + Vector128 n = Vector128.ConvertToSingle(Vector128.ShiftRightArithmetic(vx.AsInt32(), 23)); + + vx = (vx & Vector128.Create(V_MASK)) + Vector128.Create(V_OFF); + + Vector128 r = vx.AsSingle() - Vector128.Create(1.0f); + + Vector128 r2 = r * r; + Vector128 r4 = r2 * r2; + Vector128 r8 = r4 * r4; + + Vector128 q = (Vector128.Create(C10) * r2 + (Vector128.Create(C9) * r + Vector128.Create(C8))) + * r8 + (((Vector128.Create(C7) * r + Vector128.Create(C6)) + * r2 + (Vector128.Create(C5) * r + Vector128.Create(C4))) + * r4 + ((Vector128.Create(C3) * r + Vector128.Create(C2)) + * r2 + (Vector128.Create(C1) * r + Vector128.Create(C0)))); + + return Vector128.ConditionalSelect( + specialMask.AsSingle(), + specialResult, + n * Vector128.Create(V_LN2) + q + ); + } + + public static Vector256 Invoke(Vector256 x) + { + Vector256 specialResult = x; + + // x is subnormal or infinity or NaN + Vector256 specialMask = Vector256.GreaterThanOrEqual(x.AsUInt32() - Vector256.Create(V_MIN), Vector256.Create(V_MAX - V_MIN)); + + if (specialMask != Vector256.Zero) + { + // float.IsZero(x) ? float.NegativeInfinity : x + Vector256 zeroMask = Vector256.Equals(x, Vector256.Zero); + + specialResult = Vector256.ConditionalSelect( + zeroMask, + Vector256.Create(float.NegativeInfinity), + specialResult + ); + + // (x < 0) ? float.NaN : x + Vector256 lessThanZeroMask = Vector256.LessThan(x, Vector256.Zero); + + specialResult = Vector256.ConditionalSelect( + lessThanZeroMask, + Vector256.Create(float.NaN), + specialResult + ); + + // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x) + Vector256 temp = zeroMask + | lessThanZeroMask + | ~Vector256.Equals(x, x) + | Vector256.Equals(x, Vector256.Create(float.PositiveInfinity)); + + // subnormal + Vector256 subnormalMask = Vector256.AndNot(specialMask.AsSingle(), temp); + + x = Vector256.ConditionalSelect( + subnormalMask, + ((x * 8388608.0f).AsUInt32() - Vector256.Create(23u << 23)).AsSingle(), + x + ); + + specialMask = temp.AsUInt32(); + } + + Vector256 vx = x.AsUInt32() - Vector256.Create(V_OFF); + Vector256 n = Vector256.ConvertToSingle(Vector256.ShiftRightArithmetic(vx.AsInt32(), 23)); + + vx = (vx & Vector256.Create(V_MASK)) + Vector256.Create(V_OFF); + + Vector256 r = vx.AsSingle() - Vector256.Create(1.0f); + + Vector256 r2 = r * r; + Vector256 r4 = r2 * r2; + Vector256 r8 = r4 * r4; + + Vector256 q = (Vector256.Create(C10) * r2 + (Vector256.Create(C9) * r + Vector256.Create(C8))) + * r8 + (((Vector256.Create(C7) * r + Vector256.Create(C6)) + * r2 + (Vector256.Create(C5) * r + Vector256.Create(C4))) + * r4 + ((Vector256.Create(C3) * r + Vector256.Create(C2)) + * r2 + (Vector256.Create(C1) * r + Vector256.Create(C0)))); + + return Vector256.ConditionalSelect( + specialMask.AsSingle(), + specialResult, + n * Vector256.Create(V_LN2) + q + ); + } + +#if NET8_0_OR_GREATER + public static Vector512 Invoke(Vector512 x) + { + } +#endif + } + private readonly struct Log2Operator : IUnaryOperator { // This code is based on `vrs4_log2f` from amd/aocl-libm-ose diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs index 3d2b89da452bc..b9c1e128aaede 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netstandard.cs @@ -923,6 +923,19 @@ public Vector Invoke(Vector x, Vector y) public Vector Invoke(Vector x) => Vector.Abs(x); } + private readonly struct LogOperator : IUnaryOperator + { + public bool CanVectorize => false; + + public float Invoke(float x) => MathF.Log(x); + + public Vector Invoke(Vector x) + { + // Vectorizing requires shift right support, which is .NET 7 or later + throw new NotImplementedException(); + } + } + private readonly struct Log2Operator : IUnaryOperator { public bool CanVectorize => false; diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs index 137f2fd9070de..b0e36dab02acf 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitivesTests.cs @@ -1006,6 +1006,41 @@ public static void Log_InPlace(int tensorLength) } } + [Theory] + [MemberData(nameof(TensorLengths))] + public static void Log_SpecialValues(int tensorLength) + { + using BoundedMemory x = CreateAndFillTensor(tensorLength); + using BoundedMemory destination = CreateTensor(tensorLength); + + // NaN + x[s_random.Next(x.Length)] = float.NaN; + + // +Infinity + x[s_random.Next(x.Length)] = float.PositiveInfinity; + + // -Infinity + x[s_random.Next(x.Length)] = float.NegativeInfinity; + + // +Zero + x[s_random.Next(x.Length)] = +0.0f; + + // -Zero + x[s_random.Next(x.Length)] = -0.0f; + + // +Epsilon + x[s_random.Next(x.Length)] = +float.Epsilon; + + // -Epsilon + x[s_random.Next(x.Length)] = -float.Epsilon; + + TensorPrimitives.Log(x, destination); + for (int i = 0; i < tensorLength; i++) + { + Assert.Equal(MathF.Log(x[i]), destination[i], Tolerance); + } + } + [Theory] [MemberData(nameof(TensorLengths))] public static void Log_ThrowsForTooShortDestination(int tensorLength) From c8f52ecbfbe6b840bbb36d48606dc006292107a4 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 3 Oct 2023 11:59:44 -0700 Subject: [PATCH 2/2] Make sure to hit Ctrl+S --- .../Tensors/TensorPrimitives.netcore.cs | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs index dad2e65c72d0c..7e13b54148e4e 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/TensorPrimitives.netcore.cs @@ -2795,6 +2795,71 @@ public static Vector256 Invoke(Vector256 x) #if NET8_0_OR_GREATER public static Vector512 Invoke(Vector512 x) { + Vector512 specialResult = x; + + // x is subnormal or infinity or NaN + Vector512 specialMask = Vector512.GreaterThanOrEqual(x.AsUInt32() - Vector512.Create(V_MIN), Vector512.Create(V_MAX - V_MIN)); + + if (specialMask != Vector512.Zero) + { + // float.IsZero(x) ? float.NegativeInfinity : x + Vector512 zeroMask = Vector512.Equals(x, Vector512.Zero); + + specialResult = Vector512.ConditionalSelect( + zeroMask, + Vector512.Create(float.NegativeInfinity), + specialResult + ); + + // (x < 0) ? float.NaN : x + Vector512 lessThanZeroMask = Vector512.LessThan(x, Vector512.Zero); + + specialResult = Vector512.ConditionalSelect( + lessThanZeroMask, + Vector512.Create(float.NaN), + specialResult + ); + + // float.IsZero(x) | (x < 0) | float.IsNaN(x) | float.IsPositiveInfinity(x) + Vector512 temp = zeroMask + | lessThanZeroMask + | ~Vector512.Equals(x, x) + | Vector512.Equals(x, Vector512.Create(float.PositiveInfinity)); + + // subnormal + Vector512 subnormalMask = Vector512.AndNot(specialMask.AsSingle(), temp); + + x = Vector512.ConditionalSelect( + subnormalMask, + ((x * 8388608.0f).AsUInt32() - Vector512.Create(23u << 23)).AsSingle(), + x + ); + + specialMask = temp.AsUInt32(); + } + + Vector512 vx = x.AsUInt32() - Vector512.Create(V_OFF); + Vector512 n = Vector512.ConvertToSingle(Vector512.ShiftRightArithmetic(vx.AsInt32(), 23)); + + vx = (vx & Vector512.Create(V_MASK)) + Vector512.Create(V_OFF); + + Vector512 r = vx.AsSingle() - Vector512.Create(1.0f); + + Vector512 r2 = r * r; + Vector512 r4 = r2 * r2; + Vector512 r8 = r4 * r4; + + Vector512 q = (Vector512.Create(C10) * r2 + (Vector512.Create(C9) * r + Vector512.Create(C8))) + * r8 + (((Vector512.Create(C7) * r + Vector512.Create(C6)) + * r2 + (Vector512.Create(C5) * r + Vector512.Create(C4))) + * r4 + ((Vector512.Create(C3) * r + Vector512.Create(C2)) + * r2 + (Vector512.Create(C1) * r + Vector512.Create(C0)))); + + return Vector512.ConditionalSelect( + specialMask.AsSingle(), + specialResult, + n * Vector512.Create(V_LN2) + q + ); } #endif }