Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply arm64 intrinsics to System.Text.Encodings.Web #38707

Merged
merged 32 commits into from
Jul 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
79fb08d
use arm64 intrinsics in FindFirstCharacterToEncodeUtf8
eiriktsarpalis Jul 2, 2020
bc69c5e
remove whitespace
eiriktsarpalis Jul 2, 2020
67c3eef
shim intrinsics apis for netstandard & netcoreapp targets
eiriktsarpalis Jul 3, 2020
2055a00
use shimmed APIs in DefaultJavascripEncoder.cs
eiriktsarpalis Jul 3, 2020
f0f6ff6
implement MoveMask over AdvSimd
eiriktsarpalis Jul 3, 2020
d05afc6
Fix build and remove x86 shims
eiriktsarpalis Jul 3, 2020
3aeef10
remove stub goto labels
eiriktsarpalis Jul 6, 2020
74b7448
implement optimizations for FindFirstCharacterToEncodeUtf8
eiriktsarpalis Jul 6, 2020
a65af06
optimize FindFirstCharacterToEncode
eiriktsarpalis Jul 6, 2020
3994520
fix bug
eiriktsarpalis Jul 6, 2020
1a1fd15
implement optimizations for FindFirstCharacterToEncodeUtf8
eiriktsarpalis Jul 7, 2020
6634260
remove goto labels
eiriktsarpalis Jul 7, 2020
8f9b52f
optimize FindFirstCharacterToEncode
eiriktsarpalis Jul 7, 2020
119be4c
Optimiize FindFirstCharacterToEncodeUtf8
eiriktsarpalis Jul 7, 2020
ec4a2b0
bug fix
eiriktsarpalis Jul 7, 2020
f88ec4b
add missing shim methods
eiriktsarpalis Jul 7, 2020
6258a97
fix bug
eiriktsarpalis Jul 7, 2020
3c76a7e
minor cleanups
eiriktsarpalis Jul 7, 2020
c3e66f3
cleanup and add comments
eiriktsarpalis Jul 7, 2020
2a24500
address feedback
eiriktsarpalis Jul 9, 2020
ca4180c
address feedback and add checks for endianness
eiriktsarpalis Jul 10, 2020
874a030
address feedback
eiriktsarpalis Jul 10, 2020
46467bc
Use shims from CoreLib
eiriktsarpalis Jul 10, 2020
0a0d8f6
Remove AdvSimdHelper.MoveMask and factor non-ascii byte locator logic…
eiriktsarpalis Jul 11, 2020
70349d8
address feedback
eiriktsarpalis Jul 14, 2020
448376d
reinstate goto labels
eiriktsarpalis Jul 14, 2020
100ba88
revert removed goto labels
eiriktsarpalis Jul 14, 2020
3954447
revert more changes
eiriktsarpalis Jul 14, 2020
e4fc529
address feedback
eiriktsarpalis Jul 14, 2020
70d02ae
address feedback
eiriktsarpalis Jul 14, 2020
6f0f2d1
further optimize GetIndexOfFirstNonAsciiByte
eiriktsarpalis Jul 14, 2020
0ff134f
add TODO comment
eiriktsarpalis Jul 14, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
<ExcludeCurrentNetCoreAppFromPackage>true</ExcludeCurrentNetCoreAppFromPackage>
<Nullable>enable</Nullable>
</PropertyGroup>
<PropertyGroup>
<!-- CS3019: CLS attributes on internal types. Some shared source files are internal in this project. -->
<NoWarn Condition="'$(TargetFramework)' == 'netcoreapp3.0'">$(NoWarn);CS3019</NoWarn>
</PropertyGroup>
<ItemGroup>
<Compile Include="System\Text\Encodings\Web\DefaultJavaScriptEncoder.cs" />
<Compile Include="System\Text\Encodings\Web\DefaultJavaScriptEncoderBasicLatin.cs" />
Expand All @@ -24,17 +28,19 @@
<Compile Include="System\Text\Unicode\UnicodeRanges.generated.cs" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)' == '$(NetCoreAppCurrent)' or '$(TargetFramework)' == 'netcoreapp3.0'">
<Compile Include="System\Text\Encodings\Web\BitHelper.cs" />
<Compile Include="System\Text\Encodings\Web\AdvSimdHelper.cs" />
<Compile Include="System\Text\Encodings\Web\Sse2Helper.cs" />
<Compile Include="System\Text\Encodings\Web\Ssse3Helper.cs" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)' == 'netcoreapp3.0'">
<Compile Include="$(CoreLibSharedDir)\System\Runtime\CompilerServices\IntrinsicAttribute.cs" Link="System\Runtime\CompilerServices\IntrinsicAttribute.cs" />
<Compile Include="$(CoreLibSharedDir)\System\Runtime\Intrinsics\Arm\AdvSimd.PlatformNotSupported.cs" Link="System\Runtime\Intrinsics\Arm\AdvSimd.PlatformNotSupported.cs" />
<Compile Include="$(CoreLibSharedDir)\System\Runtime\Intrinsics\Arm\ArmBase.PlatformNotSupported.cs" Link="System\Runtime\Intrinsics\Arm\ArmBase.PlatformNotSupported.cs" />
</ItemGroup>
<ItemGroup>
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeDebug.cs"
Link="System\Text\UnicodeDebug.cs" />
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeUtility.cs"
Link="System\Text\UnicodeUtility.cs" />
<Compile Include="$(CommonPath)System\HexConverter.cs"
Link="Common\System\HexConverter.cs" />
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeDebug.cs" Link="System\Text\UnicodeDebug.cs" />
<Compile Include="$(CoreLibSharedDir)System\Text\UnicodeUtility.cs" Link="System\Text\UnicodeUtility.cs" />
<Compile Include="$(CommonPath)System\HexConverter.cs" Link="Common\System\HexConverter.cs" />
</ItemGroup>
<ItemGroup>
<Reference Include="System.Memory" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;

namespace System.Text.Encodings.Web
{
internal static class AdvSimdHelper
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(Vector128<short> sourceValue)
{
if (!AdvSimd.Arm64.IsSupported)
{
throw new PlatformNotSupportedException();
}

// Anything in the control characters range, and anything above short.MaxValue but less than or equal char.MaxValue
// That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
Vector128<short> mask = AdvSimd.CompareLessThan(sourceValue, s_spaceMaskInt16);

mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_quotationMarkMaskInt16));
mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_reverseSolidusMaskInt16));

// Anything above the ASCII range, and also including the leftover control character in the ASCII range - 0x7F
// When this method is called with only ASCII data, 0x7F is the only value that would meet this comparison.
// However, when called from "Default", the source could contain characters outside the ASCII range.
mask = AdvSimd.Or(mask, AdvSimd.CompareGreaterThan(sourceValue, s_tildeMaskInt16));

return mask;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(Vector128<sbyte> sourceValue)
{
if (!AdvSimd.Arm64.IsSupported)
{
throw new PlatformNotSupportedException();
}

// Anything in the control characters range (except 0x7F), and anything above sbyte.MaxValue but less than or equal byte.MaxValue
// That's because anything between 128 and 255 (inclusive) will overflow and become negative.
Vector128<sbyte> mask = AdvSimd.CompareLessThan(sourceValue, s_spaceMaskSByte);

mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_quotationMarkMaskSByte));
mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_reverseSolidusMaskSByte));

// Leftover control character in the ASCII range - 0x7F
// Since we are dealing with sbytes, 0x7F is the only value that would meet this comparison.
mask = AdvSimd.Or(mask, AdvSimd.CompareGreaterThan(sourceValue, s_tildeMaskSByte));

return mask;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> CreateEscapingMask_DefaultJavaScriptEncoderBasicLatin(Vector128<sbyte> sourceValue)
{
if (!AdvSimd.Arm64.IsSupported)
{
throw new PlatformNotSupportedException();
}

Vector128<sbyte> mask = CreateEscapingMask_UnsafeRelaxedJavaScriptEncoder(sourceValue);

mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_ampersandMaskSByte));
mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_apostropheMaskSByte));
mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_plusSignMaskSByte));
mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_lessThanSignMaskSByte));
mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_greaterThanSignMaskSByte));
mask = AdvSimd.Or(mask, AdvSimd.CompareEqual(sourceValue, s_graveAccentMaskSByte));

return mask;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<short> CreateAsciiMask(Vector128<short> sourceValue)
{
if (!AdvSimd.Arm64.IsSupported)
{
throw new PlatformNotSupportedException();
}

// Anything above short.MaxValue but less than or equal char.MaxValue
// That's because anything between 32768 and 65535 (inclusive) will overflow and become negative.
Vector128<short> mask = AdvSimd.CompareLessThan(sourceValue, s_nullMaskInt16);

// Anything above the ASCII range
mask = AdvSimd.Or(mask, AdvSimd.CompareGreaterThan(sourceValue, s_maxAsciiCharacterMaskInt16));

return mask;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool ContainsNonAsciiByte(Vector128<sbyte> value)
{
if (!AdvSimd.Arm64.IsSupported)
{
throw new PlatformNotSupportedException();
}

// most significant bit mask for a 64-bit byte vector
const ulong MostSignficantBitMask = 0x8080808080808080;

value = AdvSimd.Arm64.MinPairwise(value, value);
return (value.AsUInt64().ToScalar() & MostSignficantBitMask) != 0;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int GetIndexOfFirstNonAsciiByte(Vector128<byte> value)
{
if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian)
{
throw new PlatformNotSupportedException();
}

// extractedBits[i] = (value[i] >> 7) & (1 << (12 * (i % 2)));
Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, s_bitmask);

// collapse mask to lower bits
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
eiriktsarpalis marked this conversation as resolved.
Show resolved Hide resolved
ulong mask = extractedBits.AsUInt64().ToScalar();

// calculate the index
int index = BitOperations.TrailingZeroCount(mask) >> 2;
Debug.Assert((mask != 0) ? index < 16 : index >= 16);
return index;
}

private static readonly Vector128<short> s_nullMaskInt16 = Vector128<short>.Zero;
private static readonly Vector128<short> s_spaceMaskInt16 = Vector128.Create((short)' ');
private static readonly Vector128<short> s_quotationMarkMaskInt16 = Vector128.Create((short)'"');
private static readonly Vector128<short> s_reverseSolidusMaskInt16 = Vector128.Create((short)'\\');
private static readonly Vector128<short> s_tildeMaskInt16 = Vector128.Create((short)'~');
private static readonly Vector128<short> s_maxAsciiCharacterMaskInt16 = Vector128.Create((short)0x7F); // Delete control character

private static readonly Vector128<sbyte> s_spaceMaskSByte = Vector128.Create((sbyte)' ');
private static readonly Vector128<sbyte> s_quotationMarkMaskSByte = Vector128.Create((sbyte)'"');
private static readonly Vector128<sbyte> s_ampersandMaskSByte = Vector128.Create((sbyte)'&');
private static readonly Vector128<sbyte> s_apostropheMaskSByte = Vector128.Create((sbyte)'\'');
private static readonly Vector128<sbyte> s_plusSignMaskSByte = Vector128.Create((sbyte)'+');
private static readonly Vector128<sbyte> s_lessThanSignMaskSByte = Vector128.Create((sbyte)'<');
private static readonly Vector128<sbyte> s_greaterThanSignMaskSByte = Vector128.Create((sbyte)'>');
private static readonly Vector128<sbyte> s_reverseSolidusMaskSByte = Vector128.Create((sbyte)'\\');
private static readonly Vector128<sbyte> s_graveAccentMaskSByte = Vector128.Create((sbyte)'`');
private static readonly Vector128<sbyte> s_tildeMaskSByte = Vector128.Create((sbyte)'~');

private static readonly Vector128<byte> s_bitmask = BitConverter.IsLittleEndian ?
Vector128.Create((ushort)0x1001).AsByte() :
Vector128.Create((ushort)0x0110).AsByte();
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#if NETCOREAPP
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics.Arm;
#endif

namespace System.Text.Encodings.Web
Expand Down Expand Up @@ -87,21 +88,33 @@ public override unsafe int FindFirstCharacterToEncodeUtf8(ReadOnlySpan<byte> utf
int idx = 0;

#if NETCOREAPP
if (Sse2.IsSupported)
if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
{
sbyte* startingAddress = (sbyte*)ptr;
while (utf8Text.Length - 16 >= idx)
{
Debug.Assert(startingAddress >= ptr && startingAddress <= (ptr + utf8Text.Length - 16));

// Load the next 16 bytes.
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
bool containsNonAsciiBytes;

// Check for ASCII text. Any byte that's not in the ASCII range will already be negative when
// casted to signed byte.
int index = Sse2.MoveMask(sourceValue);
// Load the next 16 bytes, and check for ASCII text.
// Any byte that's not in the ASCII range will already be negative when casted to signed byte.
if (Sse2.IsSupported)
{
Vector128<sbyte> sourceValue = Sse2.LoadVector128(startingAddress);
containsNonAsciiBytes = Sse2Helper.ContainsNonAsciiByte(sourceValue);
}
else if (AdvSimd.Arm64.IsSupported)
{
Vector128<sbyte> sourceValue = AdvSimd.LoadVector128(startingAddress);
containsNonAsciiBytes = AdvSimdHelper.ContainsNonAsciiByte(sourceValue);
}
else
{
throw new PlatformNotSupportedException();
}

if (index != 0)
if (containsNonAsciiBytes)
{
// At least one of the following 16 bytes is non-ASCII.

Expand Down
Loading