From fdb03ca8776f5197fb046067b994b605bb6e715c Mon Sep 17 00:00:00 2001 From: SwapnilGaikwad Date: Wed, 10 Jan 2024 06:30:37 +0000 Subject: [PATCH] Use multi-reg load/store for EncodeToUtf8 (#95513) * Use multi-reg load/store for EncodeToUtf8 * Use the fixed version of multi-reg store * Update variable naming --- .../src/System/Buffers/Text/Base64Encoder.cs | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs index 08ca62b533f51..9c28f57038a30 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Encoder.cs @@ -85,6 +85,15 @@ public static unsafe OperationStatus EncodeToUtf8(ReadOnlySpan bytes, Span goto DoneExit; } + end = srcMax - 48; + if (AdvSimd.Arm64.IsSupported && (end >= src)) + { + AdvSimdEncode(ref src, ref dest, end, maxSrcLength, destLength, srcBytes, destBytes); + + if (src == srcEnd) + goto DoneExit; + } + end = srcMax - 16; if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian && (end >= src)) { @@ -480,6 +489,64 @@ private static unsafe void Avx2Encode(ref byte* srcBytes, ref byte* destBytes, b destBytes = dest; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + private static unsafe void AdvSimdEncode(ref byte* srcBytes, ref byte* destBytes, byte* srcEnd, int sourceLength, int destLength, byte* srcStart, byte* destStart) + { + // C# implementatino of https://github.com/aklomp/base64/blob/3a5add8652076612a8407627a42c768736a4263f/lib/arch/neon64/enc_loop.c + Vector128 str1; + Vector128 str2; + Vector128 str3; + Vector128 res1; + Vector128 res2; + Vector128 res3; + Vector128 res4; + Vector128 tblEnc1 = Vector128.Create("ABCDEFGHIJKLMNOP"u8).AsByte(); + Vector128 tblEnc2 = Vector128.Create("QRSTUVWXYZabcdef"u8).AsByte(); + Vector128 tblEnc3 = Vector128.Create("ghijklmnopqrstuv"u8).AsByte(); + Vector128 tblEnc4 = Vector128.Create("wxyz0123456789+/"u8).AsByte(); + byte* src = srcBytes; + byte* dest = destBytes; + + // If we have Neon support, pick off 48 bytes at a time for as long as we can. + do + { + // Load 48 bytes and deinterleave: + AssertRead>(src, srcStart, sourceLength); + (str1, str2, str3) = AdvSimd.Arm64.LoadVector128x3AndUnzip(src); + + // Divide bits of three input bytes over four output bytes: + res1 = AdvSimd.ShiftRightLogical(str1, 2); + res2 = AdvSimd.ShiftRightLogical(str2, 4); + res3 = AdvSimd.ShiftRightLogical(str3, 6); + res2 = AdvSimd.ShiftLeftAndInsert(res2, str1, 4); + res3 = AdvSimd.ShiftLeftAndInsert(res3, str2, 2); + + // Clear top two bits: + res2 &= AdvSimd.DuplicateToVector128((byte)0x3F); + res3 &= AdvSimd.DuplicateToVector128((byte)0x3F); + res4 = str3 & AdvSimd.DuplicateToVector128((byte)0x3F); + + // The bits have now been shifted to the right locations; + // translate their values 0..63 to the Base64 alphabet. + // Use a 64-byte table lookup: + res1 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res1); + res2 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res2); + res3 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res3); + res4 = AdvSimd.Arm64.VectorTableLookup((tblEnc1, tblEnc2, tblEnc3, tblEnc4), res4); + + // Interleave and store result: + AssertWrite>(dest, destStart, destLength); + AdvSimd.Arm64.StoreVector128x4AndZip(dest, (res1, res2, res3, res4)); + + src += 48; + dest += 64; + } while (src <= srcEnd); + + srcBytes = src; + destBytes = dest; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))]