Skip to content

Commit

Permalink
port SpanHelpers.IndexOfAny(ref byte, byte, byte, byte, int) to Vecto…
Browse files Browse the repository at this point in the history
…r128/256 (#73481)
  • Loading branch information
adamsitnik authored Aug 11, 2022
1 parent cbf2e32 commit 402aa85
Showing 1 changed file with 21 additions and 90 deletions.
111 changes: 21 additions & 90 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1100,13 +1100,13 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
nuint lengthToExamine = (nuint)(uint)length;

if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)
if (Vector128.IsHardwareAccelerated)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
nint vectorDiff = (nint)length - Vector128<byte>.Count;
if (vectorDiff >= 0)
{
// >= Sse2 intrinsics are supported, and length is enough to use them so use that path.
// >= Vector128 is accelerated, and length is enough to use them so use that path.
// We jump forward to the intrinsics at the end of the method so a naive branch predict
// will choose the non-intrinsic path so short lengths which don't gain anything aren't
// overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths
Expand Down Expand Up @@ -1215,10 +1215,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
// the end and forwards, which may overlap on an earlier compare.

// We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported.
if (Sse2.IsSupported)
if (Vector128.IsHardwareAccelerated)
{
int matches;
if (Avx2.IsSupported)
uint matches;
if (Vector256.IsHardwareAccelerated)
{
Vector256<byte> search;
// Guard as we may only have a valid size for Vector128; when we will move to the Sse2
Expand All @@ -1235,15 +1235,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
// First time this checks again against 0, however we will move into final compare if it fails.
while (lengthToExamine > offset)
{
search = LoadVector256(ref searchSpace, offset);
search = Vector256.LoadUnsafe(ref searchSpace, offset);
// Bitwise Or to combine the flagged matches for the second value to our match flags
matches = Avx2.MoveMask(
Avx2.Or(
Avx2.Or(
Avx2.CompareEqual(values0, search),
Avx2.CompareEqual(values1, search)),
Avx2.CompareEqual(values2, search)));
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search)).ExtractMostSignificantBits();
// Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
if (matches == 0)
{
Expand All @@ -1256,15 +1251,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
}

// Move to Vector length from end for final compare
search = LoadVector256(ref searchSpace, lengthToExamine);
search = Vector256.LoadUnsafe(ref searchSpace, lengthToExamine);
offset = lengthToExamine;
// Same as method as above
matches = Avx2.MoveMask(
Avx2.Or(
Avx2.Or(
Avx2.CompareEqual(values0, search),
Avx2.CompareEqual(values1, search)),
Avx2.CompareEqual(values2, search)));
matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search) | Vector256.Equals(values2, search)).ExtractMostSignificantBits();
if (matches == 0)
{
// None matched
Expand All @@ -1278,105 +1268,46 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, byt
// Initial size check was done on method entry.
Debug.Assert(length >= Vector128<byte>.Count);
{
Vector128<byte> search;
Vector128<byte> search, compareResult;
Vector128<byte> values0 = Vector128.Create(value0);
Vector128<byte> values1 = Vector128.Create(value1);
Vector128<byte> values2 = Vector128.Create(value2);
// First time this checks against 0 and we will move into final compare if it fails.
while (lengthToExamine > offset)
{
search = LoadVector128(ref searchSpace, offset);
search = Vector128.LoadUnsafe(ref searchSpace, offset);

matches = Sse2.MoveMask(
Sse2.Or(
Sse2.Or(
Sse2.CompareEqual(values0, search),
Sse2.CompareEqual(values1, search)),
Sse2.CompareEqual(values2, search)));
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
if (matches == 0)
compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search);
if (compareResult == Vector128<byte>.Zero)
{
// None matched
offset += (nuint)Vector128<byte>.Count;
continue;
}

// Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
matches = compareResult.ExtractMostSignificantBits();
goto IntrinsicsMatch;
}
// Move to Vector length from end for final compare
search = LoadVector128(ref searchSpace, lengthToExamine);
search = Vector128.LoadUnsafe(ref searchSpace, lengthToExamine);
offset = lengthToExamine;
// Same as method as above
matches = Sse2.MoveMask(
Sse2.Or(
Sse2.Or(
Sse2.CompareEqual(values0, search),
Sse2.CompareEqual(values1, search)),
Sse2.CompareEqual(values2, search)));
if (matches == 0)
compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search) | Vector128.Equals(values2, search);
if (compareResult == Vector128<byte>.Zero)
{
// None matched
goto NotFound;
}
matches = compareResult.ExtractMostSignificantBits();
}

IntrinsicsMatch:
// Find bitflag offset of first difference and add to current offset
offset += (nuint)BitOperations.TrailingZeroCount(matches);
goto Found;
}
else if (AdvSimd.Arm64.IsSupported)
{
Vector128<byte> search;
Vector128<byte> matches;
Vector128<byte> values0 = Vector128.Create(value0);
Vector128<byte> values1 = Vector128.Create(value1);
Vector128<byte> values2 = Vector128.Create(value2);
// First time this checks against 0 and we will move into final compare if it fails.
while (lengthToExamine > offset)
{
search = LoadVector128(ref searchSpace, offset);

matches = AdvSimd.Or(
AdvSimd.Or(
AdvSimd.CompareEqual(values0, search),
AdvSimd.CompareEqual(values1, search)),
AdvSimd.CompareEqual(values2, search));

if (matches == Vector128<byte>.Zero)
{
offset += (nuint)Vector128<byte>.Count;
continue;
}

// Find bitflag offset of first match and add to current offset
offset += FindFirstMatchedLane(matches);

goto Found;
}

// Move to Vector length from end for final compare
search = LoadVector128(ref searchSpace, lengthToExamine);
offset = lengthToExamine;
// Same as method as above
matches = AdvSimd.Or(
AdvSimd.Or(
AdvSimd.CompareEqual(values0, search),
AdvSimd.CompareEqual(values1, search)),
AdvSimd.CompareEqual(values2, search));

if (matches == Vector128<byte>.Zero)
{
// None matched
goto NotFound;
}

// Find bitflag offset of first match and add to current offset
offset += FindFirstMatchedLane(matches);

goto Found;
}
else if (Vector.IsHardwareAccelerated)
{
Vector<byte> values0 = new Vector<byte>(value0);
Expand Down

0 comments on commit 402aa85

Please sign in to comment.