Skip to content

Commit

Permalink
Add a SearchValues ProbabilisticMap implementation that uses an ASCII…
Browse files Browse the repository at this point in the history
… fast path (#89155)

* Add a SearchValues ProbabilisticMap implementation that uses an ASCII fast path

* Add comments and asserts around IOptimizations selection

* Unused using
  • Loading branch information
MihaZupan authored Jul 19, 2023
1 parent d1adf81 commit fb2ae67
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Index.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Reflection\Emit\ILGenerator.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitVector256.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticWithAsciiCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleByteSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2ByteSearchValues.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ internal static unsafe void ComputeBitmap<T>(ReadOnlySpan<T> values, out Vector2

if (value > 127)
{
// The values were modified concurrent with the call to SearchValues.Create
ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
continue;
}

lookupLocal.Set(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System.Buffers
{
Expand All @@ -14,16 +13,6 @@ internal sealed class ProbabilisticCharSearchValues : SearchValues<char>

public ProbabilisticCharSearchValues(scoped ReadOnlySpan<char> values)
{
if (Vector128.IsHardwareAccelerated && values.Length < 8)
{
// ProbabilisticMap does a Span.Contains check to confirm potential matches.
// If we have fewer than 8 values, pad them with existing ones to make the verification faster.
Span<char> newValues = stackalloc char[8];
newValues.Fill(values[0]);
values.CopyTo(newValues);
values = newValues;
}

_values = new string(values);
_map = new ProbabilisticMap(_values);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Wasm;
using System.Runtime.Intrinsics.X86;

namespace System.Buffers
{
internal sealed class ProbabilisticWithAsciiCharSearchValues<TOptimizations> : SearchValues<char>
where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations
{
private Vector256<byte> _asciiBitmap;
private Vector256<byte> _inverseAsciiBitmap;
private ProbabilisticMap _map;
private readonly string _values;

public ProbabilisticWithAsciiCharSearchValues(scoped ReadOnlySpan<char> values)
{
Debug.Assert(IndexOfAnyAsciiSearcher.IsVectorizationSupported);
Debug.Assert(values.ContainsAnyInRange((char)0, (char)127));

IndexOfAnyAsciiSearcher.ComputeBitmap(values, out _asciiBitmap, out _);
_inverseAsciiBitmap = ~_asciiBitmap;

_values = new string(values);
_map = new ProbabilisticMap(_values);
}

internal override char[] GetValues() => _values.ToCharArray();

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override bool ContainsCore(char value) =>
ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);

internal override int IndexOfAny(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).

// If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
// Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
// Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
// running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));

if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
{
Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");

offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}
else
{
Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
"The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");

offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to using the ProbabilisticMap.
span = span.Slice(offset);
}

int index = ProbabilisticMap.IndexOfAny(
ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
ref MemoryMarshal.GetReference(span),
span.Length,
_values);

if (index >= 0)
{
// We found a match. Account for the number of ASCII characters we've skipped previously.
index += offset;
}

return index;
}

internal override int IndexOfAnyExcept(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _asciiBitmap);

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to a simple char-by-char search.
span = span.Slice(offset);
}

int index = ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
ref MemoryMarshal.GetReference(span),
span.Length,
_values);

if (index >= 0)
{
// We found a match. Account for the number of ASCII characters we've skipped previously.
index += offset;
}

return index;
}

internal override int LastIndexOfAny(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).

// If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
// Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
// Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
// running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));

int offset;

if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
{
Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");

offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}
else
{
Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
"The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");

offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to using the ProbabilisticMap.
span = span.Slice(0, offset + 1);
}

return ProbabilisticMap.LastIndexOfAny(
ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
ref MemoryMarshal.GetReference(span),
span.Length,
_values);
}

internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _asciiBitmap);

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to a simple char-by-char search.
span = span.Slice(0, offset + 1);
}

return ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
ref MemoryMarshal.GetReference(span),
span.Length,
_values);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,29 @@ ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(values)),
return new Latin1CharSearchValues(values);
}

return new ProbabilisticCharSearchValues(values);
scoped ReadOnlySpan<char> probabilisticValues = values;

if (Vector128.IsHardwareAccelerated && values.Length < 8)
{
// ProbabilisticMap does a Span.Contains check to confirm potential matches.
// If we have fewer than 8 values, pad them with existing ones to make the verification faster.
Span<char> newValues = stackalloc char[8];
newValues.Fill(values[0]);
values.CopyTo(newValues);
probabilisticValues = newValues;
}

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && minInclusive < 128)
{
// If we have both ASCII and non-ASCII characters, use an implementation that
// does an optimistic ASCII fast-path and then falls back to the ProbabilisticMap.

return (Ssse3.IsSupported || PackedSimd.IsSupported) && probabilisticValues.Contains('\0')
? new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(probabilisticValues)
: new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Default>(probabilisticValues);
}

return new ProbabilisticCharSearchValues(probabilisticValues);
}

private static bool TryGetSingleRange<T>(ReadOnlySpan<T> values, out T minInclusive, out T maxInclusive)
Expand Down

0 comments on commit fb2ae67

Please sign in to comment.