Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use IndexOfAnyValues in Xml #78664

Merged
merged 3 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1514,20 +1514,14 @@ private int ReadBytes(Encoding encoding, int byteBlock, int charBlock, byte[] bu
catch (FormatException exception)
{
// Something was wrong with the format, see if we can strip the spaces
int i = 0;
int j = 0;
while (true)
int newCount = XmlConverter.StripWhitespace(chars.AsSpan(0, charCount));
if (newCount == charCount)
{
while (j < charCount && XmlConverter.IsWhitespace(chars[j]))
j++;
if (j == charCount)
break;
chars[i++] = chars[j++];
}
// No spaces, so don't try again
if (i == charCount)
// No spaces, so don't try again
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(exception.Message, exception.InnerException));
charCount = i;
}

charCount = newCount;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -750,27 +750,20 @@ protected void StartContent(char[] chars, int offset, int count)

private static void VerifyWhitespace(char ch)
{
if (!IsWhitespace(ch))
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
if (!XmlConverter.IsWhitespace(ch))
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

private static void VerifyWhitespace(string s)
{
for (int i = 0; i < s.Length; i++)
if (!IsWhitespace(s[i]))
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
if (!XmlConverter.IsWhitespace(s))
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

private static void VerifyWhitespace(char[] chars, int offset, int count)
{
for (int i = 0; i < count; i++)
if (!IsWhitespace(chars[offset + i]))
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

private static bool IsWhitespace(char ch)
{
return (ch == ' ' || ch == '\n' || ch == '\r' || ch == 't');
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
if (!XmlConverter.IsWhitespace(chars.AsSpan(offset, count)))
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
}

protected static void EndContent()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -768,23 +768,12 @@ public int GetCharEntity(int offset, int length)
public bool IsWhitespaceKey(int key)
{
string s = GetDictionaryString(key).Value;
for (int i = 0; i < s.Length; i++)
{
if (!XmlConverter.IsWhitespace(s[i]))
return false;
}
return true;
return XmlConverter.IsWhitespace(s);
}

public bool IsWhitespaceUTF8(int offset, int length)
{
byte[] buffer = _buffer;
for (int i = 0; i < length; i++)
{
if (!XmlConverter.IsWhitespace((char)buffer[offset + i]))
return false;
}
return true;
return XmlConverter.IsWhitespace(_buffer.AsSpan(offset, length));
}

public bool IsWhitespaceUnicode(int offset, int length)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
using System.Runtime.Serialization;
using System.Collections.Generic;
using System.Collections.ObjectModel;

using System.Buffers;

namespace System.Xml
{
Expand All @@ -30,6 +30,10 @@ internal static class XmlConverter
public const int MaxUInt64Chars = 32;
public const int MaxPrimitiveChars = MaxDateTimeChars;

// Matches IsWhitespace below
private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(" \t\r\n");
private static readonly IndexOfAnyValues<byte> s_whitespaceBytes = IndexOfAnyValues.Create(" \t\r\n"u8);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IndexOfAnyValues.Create and u8 go nicely together.


public static bool ToBoolean(string value)
{
try
Expand Down Expand Up @@ -1082,45 +1086,62 @@ public static int ToChars(DateTime value, byte[] chars, int offset)
return offset - offsetMin;
}

public static bool IsWhitespace(string s)
public static bool IsWhitespace(ReadOnlySpan<char> chars) =>
chars.IndexOfAnyExcept(s_whitespaceChars) < 0;

public static bool IsWhitespace(ReadOnlySpan<byte> bytes) =>
bytes.IndexOfAnyExcept(s_whitespaceBytes) < 0;

public static bool IsWhitespace(char ch) =>
ch is <= ' ' and (' ' or '\t' or '\r' or '\n');
MihaZupan marked this conversation as resolved.
Show resolved Hide resolved

public static int StripWhitespace(Span<char> chars)
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
{
for (int i = 0; i < s.Length; i++)
int count = chars.IndexOfAny(s_whitespaceChars);
if (count < 0)
{
if (!IsWhitespace(s[i]))
return false;
return chars.Length;
}
return true;
}

public static bool IsWhitespace(char ch)
{
return (ch <= ' ' && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'));
foreach (char c in chars.Slice(count + 1))
{
if (!IsWhitespace(c))
{
chars[count++] = c;
}
}

return count;
}

public static string StripWhitespace(string s)
{
int count = s.Length;
for (int i = 0; i < s.Length; i++)
int indexOfWhitespace = s.AsSpan().IndexOfAny(s_whitespaceChars);
if (indexOfWhitespace < 0)
{
if (IsWhitespace(s[i]))
return s;
}

int count = s.Length - 1;
foreach (char c in s.AsSpan(indexOfWhitespace + 1))
{
if (IsWhitespace(c))
{
count--;
}
}
if (count == s.Length)
return s;

return string.Create(count, s, (chars, s) =>
return string.Create(count, s, static (chars, s) =>
{
int count = 0;
for (int i = 0; i < s.Length; i++)
foreach (char c in s)
{
char ch = s[i];
if (!IsWhitespace(ch))
if (!IsWhitespace(c))
{
chars[count++] = ch;
chars[count++] = c;
}
}
Debug.Assert(count == chars.Length);
});
}
}
Expand Down
84 changes: 41 additions & 43 deletions src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand All @@ -13,6 +14,19 @@ namespace System.Xml
/// </summary>
internal static class XmlCharType
{
#if DEBUG
static XmlCharType()
{
for (int i = 0; i < 128; i++)
{
char c = (char)i;
Debug.Assert(PublicIdChars.Contains(c) == IsPubidChar(c));
Debug.Assert(AsciiCharDataChars.Contains(c) == IsCharData(c));
Debug.Assert(WhiteSpaceChars.Contains(c) == IsWhiteSpace(c));
}
}
#endif

// Surrogate constants
internal const int SurHighStart = 0xd800; // 1101 10xx
internal const int SurHighEnd = 0xdbff;
Expand All @@ -39,6 +53,13 @@ internal static class XmlCharType
// bitmap for public ID characters - 1 bit per character 0x0 - 0x80; no character > 0x80 is a PUBLIC ID char
private const string PublicIdBitmap = "\u2400\u0000\uffbb\uafff\uffff\u87ff\ufffe\u07ff";

private const string PublicIdChars = "\n\r !#$%'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
private const string AsciiCharDataChars = "\t\n\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
private const string WhiteSpaceChars = "\t\n\r ";

private static readonly IndexOfAnyValues<char> s_publicIdChars = IndexOfAnyValues.Create(PublicIdChars);
private static readonly IndexOfAnyValues<char> s_asciiCharDataChars = IndexOfAnyValues.Create(AsciiCharDataChars);
private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(WhiteSpaceChars);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsWhiteSpace(char ch) => (GetCharProperties(ch) & Whitespace) != 0u;
Expand Down Expand Up @@ -109,46 +130,36 @@ internal static void SplitSurrogateChar(int combinedChar, out char lowChar, out
highChar = (char)(SurHighStart + v / 1024);
}

internal static bool IsOnlyWhitespace(string? str)
{
return IsOnlyWhitespaceWithPos(str) == -1;
}
internal static bool IsOnlyWhitespace(ReadOnlySpan<char> str) =>
IsOnlyWhitespaceWithPos(str) < 0;

// Character checking on strings
internal static int IsOnlyWhitespaceWithPos(string? str)
internal static int IsOnlyWhitespaceWithPos(ReadOnlySpan<char> str) =>
str.IndexOfAnyExcept(s_whitespaceChars);

internal static int IsOnlyCharData(ReadOnlySpan<char> str)
{
if (str != null)
int i = str.IndexOfAnyExcept(s_asciiCharDataChars);
if (i < 0)
{
for (int i = 0; i < str.Length; i++)
{
if ((GetCharProperties(str[i]) & Whitespace) == 0u)
{
return i;
}
}
// Fast-path: All ASCII CharData chars
return -1;
}
return -1;
}

internal static int IsOnlyCharData(string str)
{
if (str != null)
for (; (uint)i < (uint)str.Length; i++)
{
for (int i = 0; i < str.Length; i++)
char c = str[i];
if (!IsCharData(c))
{
if ((GetCharProperties(str[i]) & CharData) == 0u)
if ((uint)(i + 1) >= (uint)str.Length || !char.IsSurrogatePair(c, str[i + 1]))
{
if (i + 1 >= str.Length || !(XmlCharType.IsHighSurrogate(str[i]) && XmlCharType.IsLowSurrogate(str[i + 1])))
{
return i;
}
else
{
i++;
}
return i;
}

i++;
}
}

return -1;
}

Expand All @@ -161,20 +172,8 @@ internal static bool IsOnlyDigits(string str, int startPos, int len)
return str.AsSpan(startPos, len).IndexOfAnyExceptInRange('0', '9') < 0;
}

internal static int IsPublicId(string str)
{
if (str != null)
{
for (int i = 0; i < str.Length; i++)
{
if (!IsPubidChar(str[i]))
{
return i;
}
}
}
return -1;
}
internal static int IsPublicId(string str) =>
str.AsSpan().IndexOfAnyExcept(s_publicIdChars);

// This method tests whether a value is in a given range with just one test; start and end should be constants
private static bool InRange(int value, int start, int end)
Expand Down Expand Up @@ -4286,6 +4285,5 @@ private static bool InRange(int value, int start, int end)
/* FFE0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0,
/* FFF0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0x00, 0x00,
};

}
}
4 changes: 2 additions & 2 deletions src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ public static string VerifyPublicId(string publicId)

// returns the position of invalid character or -1
int pos = XmlCharType.IsPublicId(publicId);
if (pos != -1)
if (pos >= 0)
{
throw CreateInvalidCharException(publicId, pos, ExceptionType.XmlException);
}
Expand Down Expand Up @@ -572,7 +572,7 @@ public static bool IsXmlSurrogatePair(char lowChar, char highChar)
return XmlCharType.IsHighSurrogate(highChar) && XmlCharType.IsLowSurrogate(lowChar);
}

// Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PublidChar
// Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PubidChar
public static bool IsPublicIdChar(char ch)
{
return XmlCharType.IsPubidChar(ch);
Expand Down