Skip to content

Commit

Permalink
Use IndexOfAnyValues in Xml (#78664)
Browse files Browse the repository at this point in the history
* Use IndexOfAnyValues in Xml

* Avoid checking whitespace char twice

* More spans
  • Loading branch information
MihaZupan authored Nov 23, 2022
1 parent 2b87d85 commit ce06592
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 103 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1514,20 +1514,14 @@ private int ReadBytes(Encoding encoding, int byteBlock, int charBlock, byte[] bu
catch (FormatException exception)
{
// Something was wrong with the format, see if we can strip the spaces
int i = 0;
int j = 0;
while (true)
int newCount = XmlConverter.StripWhitespace(chars.AsSpan(0, charCount));
if (newCount == charCount)
{
while (j < charCount && XmlConverter.IsWhitespace(chars[j]))
j++;
if (j == charCount)
break;
chars[i++] = chars[j++];
}
// No spaces, so don't try again
if (i == charCount)
// No spaces, so don't try again
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(exception.Message, exception.InnerException));
charCount = i;
}

charCount = newCount;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -750,27 +750,20 @@ protected void StartContent(char[] chars, int offset, int count)

private static void VerifyWhitespace(char ch)
{
if (!IsWhitespace(ch))
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
if (!XmlConverter.IsWhitespace(ch))
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

private static void VerifyWhitespace(string s)
{
for (int i = 0; i < s.Length; i++)
if (!IsWhitespace(s[i]))
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
if (!XmlConverter.IsWhitespace(s))
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

private static void VerifyWhitespace(char[] chars, int offset, int count)
{
for (int i = 0; i < count; i++)
if (!IsWhitespace(chars[offset + i]))
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

private static bool IsWhitespace(char ch)
{
return (ch == ' ' || ch == '\n' || ch == '\r' || ch == 't');
if (!XmlConverter.IsWhitespace(chars.AsSpan(offset, count)))
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

protected static void EndContent()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -768,23 +768,12 @@ public int GetCharEntity(int offset, int length)
public bool IsWhitespaceKey(int key)
{
string s = GetDictionaryString(key).Value;
for (int i = 0; i < s.Length; i++)
{
if (!XmlConverter.IsWhitespace(s[i]))
return false;
}
return true;
return XmlConverter.IsWhitespace(s);
}

public bool IsWhitespaceUTF8(int offset, int length)
{
byte[] buffer = _buffer;
for (int i = 0; i < length; i++)
{
if (!XmlConverter.IsWhitespace((char)buffer[offset + i]))
return false;
}
return true;
return XmlConverter.IsWhitespace(_buffer.AsSpan(offset, length));
}

public bool IsWhitespaceUnicode(int offset, int length)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
using System.Runtime.Serialization;
using System.Collections.Generic;
using System.Collections.ObjectModel;

using System.Buffers;

namespace System.Xml
{
Expand All @@ -30,6 +30,10 @@ internal static class XmlConverter
public const int MaxUInt64Chars = 32;
public const int MaxPrimitiveChars = MaxDateTimeChars;

// Matches IsWhitespace below
private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(" \t\r\n");
private static readonly IndexOfAnyValues<byte> s_whitespaceBytes = IndexOfAnyValues.Create(" \t\r\n"u8);

public static bool ToBoolean(string value)
{
try
Expand Down Expand Up @@ -1082,45 +1086,62 @@ public static int ToChars(DateTime value, byte[] chars, int offset)
return offset - offsetMin;
}

public static bool IsWhitespace(string s)
public static bool IsWhitespace(ReadOnlySpan<char> chars) =>
chars.IndexOfAnyExcept(s_whitespaceChars) < 0;

public static bool IsWhitespace(ReadOnlySpan<byte> bytes) =>
bytes.IndexOfAnyExcept(s_whitespaceBytes) < 0;

public static bool IsWhitespace(char ch) =>
ch is <= ' ' and (' ' or '\t' or '\r' or '\n');

public static int StripWhitespace(Span<char> chars)
{
for (int i = 0; i < s.Length; i++)
int count = chars.IndexOfAny(s_whitespaceChars);
if (count < 0)
{
if (!IsWhitespace(s[i]))
return false;
return chars.Length;
}
return true;
}

public static bool IsWhitespace(char ch)
{
return (ch <= ' ' && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'));
foreach (char c in chars.Slice(count + 1))
{
if (!IsWhitespace(c))
{
chars[count++] = c;
}
}

return count;
}

public static string StripWhitespace(string s)
{
int count = s.Length;
for (int i = 0; i < s.Length; i++)
int indexOfWhitespace = s.AsSpan().IndexOfAny(s_whitespaceChars);
if (indexOfWhitespace < 0)
{
if (IsWhitespace(s[i]))
return s;
}

int count = s.Length - 1;
foreach (char c in s.AsSpan(indexOfWhitespace + 1))
{
if (IsWhitespace(c))
{
count--;
}
}
if (count == s.Length)
return s;

return string.Create(count, s, (chars, s) =>
return string.Create(count, s, static (chars, s) =>
{
int count = 0;
for (int i = 0; i < s.Length; i++)
foreach (char c in s)
{
char ch = s[i];
if (!IsWhitespace(ch))
if (!IsWhitespace(c))
{
chars[count++] = ch;
chars[count++] = c;
}
}
Debug.Assert(count == chars.Length);
});
}
}
Expand Down
84 changes: 41 additions & 43 deletions src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand All @@ -13,6 +14,19 @@ namespace System.Xml
/// </summary>
internal static class XmlCharType
{
#if DEBUG
static XmlCharType()
{
for (int i = 0; i < 128; i++)
{
char c = (char)i;
Debug.Assert(PublicIdChars.Contains(c) == IsPubidChar(c));
Debug.Assert(AsciiCharDataChars.Contains(c) == IsCharData(c));
Debug.Assert(WhiteSpaceChars.Contains(c) == IsWhiteSpace(c));
}
}
#endif

// Surrogate constants
internal const int SurHighStart = 0xd800; // 1101 10xx
internal const int SurHighEnd = 0xdbff;
Expand All @@ -39,6 +53,13 @@ internal static class XmlCharType
// bitmap for public ID characters - 1 bit per character 0x0 - 0x80; no character > 0x80 is a PUBLIC ID char
private const string PublicIdBitmap = "\u2400\u0000\uffbb\uafff\uffff\u87ff\ufffe\u07ff";

private const string PublicIdChars = "\n\r !#$%'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
private const string AsciiCharDataChars = "\t\n\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
private const string WhiteSpaceChars = "\t\n\r ";

private static readonly IndexOfAnyValues<char> s_publicIdChars = IndexOfAnyValues.Create(PublicIdChars);
private static readonly IndexOfAnyValues<char> s_asciiCharDataChars = IndexOfAnyValues.Create(AsciiCharDataChars);
private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(WhiteSpaceChars);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsWhiteSpace(char ch) => (GetCharProperties(ch) & Whitespace) != 0u;
Expand Down Expand Up @@ -109,46 +130,36 @@ internal static void SplitSurrogateChar(int combinedChar, out char lowChar, out
highChar = (char)(SurHighStart + v / 1024);
}

internal static bool IsOnlyWhitespace(string? str)
{
return IsOnlyWhitespaceWithPos(str) == -1;
}
internal static bool IsOnlyWhitespace(ReadOnlySpan<char> str) =>
IsOnlyWhitespaceWithPos(str) < 0;

// Character checking on strings
internal static int IsOnlyWhitespaceWithPos(string? str)
internal static int IsOnlyWhitespaceWithPos(ReadOnlySpan<char> str) =>
str.IndexOfAnyExcept(s_whitespaceChars);

internal static int IsOnlyCharData(ReadOnlySpan<char> str)
{
if (str != null)
int i = str.IndexOfAnyExcept(s_asciiCharDataChars);
if (i < 0)
{
for (int i = 0; i < str.Length; i++)
{
if ((GetCharProperties(str[i]) & Whitespace) == 0u)
{
return i;
}
}
// Fast-path: All ASCII CharData chars
return -1;
}
return -1;
}

internal static int IsOnlyCharData(string str)
{
if (str != null)
for (; (uint)i < (uint)str.Length; i++)
{
for (int i = 0; i < str.Length; i++)
char c = str[i];
if (!IsCharData(c))
{
if ((GetCharProperties(str[i]) & CharData) == 0u)
if ((uint)(i + 1) >= (uint)str.Length || !char.IsSurrogatePair(c, str[i + 1]))
{
if (i + 1 >= str.Length || !(XmlCharType.IsHighSurrogate(str[i]) && XmlCharType.IsLowSurrogate(str[i + 1])))
{
return i;
}
else
{
i++;
}
return i;
}

i++;
}
}

return -1;
}

Expand All @@ -161,20 +172,8 @@ internal static bool IsOnlyDigits(string str, int startPos, int len)
return str.AsSpan(startPos, len).IndexOfAnyExceptInRange('0', '9') < 0;
}

internal static int IsPublicId(string str)
{
if (str != null)
{
for (int i = 0; i < str.Length; i++)
{
if (!IsPubidChar(str[i]))
{
return i;
}
}
}
return -1;
}
internal static int IsPublicId(string str) =>
str.AsSpan().IndexOfAnyExcept(s_publicIdChars);

// This method tests whether a value is in a given range with just one test; start and end should be constants
private static bool InRange(int value, int start, int end)
Expand Down Expand Up @@ -4286,6 +4285,5 @@ private static bool InRange(int value, int start, int end)
/* FFE0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0,
/* FFF0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0x00, 0x00,
};

}
}
4 changes: 2 additions & 2 deletions src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ public static string VerifyPublicId(string publicId)

// returns the position of invalid character or -1
int pos = XmlCharType.IsPublicId(publicId);
if (pos != -1)
if (pos >= 0)
{
throw CreateInvalidCharException(publicId, pos, ExceptionType.XmlException);
}
Expand Down Expand Up @@ -572,7 +572,7 @@ public static bool IsXmlSurrogatePair(char lowChar, char highChar)
return XmlCharType.IsHighSurrogate(highChar) && XmlCharType.IsLowSurrogate(lowChar);
}

// Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PublidChar
// Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PubidChar
public static bool IsPublicIdChar(char ch)
{
return XmlCharType.IsPubidChar(ch);
Expand Down

0 comments on commit ce06592

Please sign in to comment.