diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseReader.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseReader.cs index f797800dc6d86..5c178ea6414f0 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseReader.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseReader.cs @@ -1514,20 +1514,14 @@ private int ReadBytes(Encoding encoding, int byteBlock, int charBlock, byte[] bu catch (FormatException exception) { // Something was wrong with the format, see if we can strip the spaces - int i = 0; - int j = 0; - while (true) + int newCount = XmlConverter.StripWhitespace(chars.AsSpan(0, charCount)); + if (newCount == charCount) { - while (j < charCount && XmlConverter.IsWhitespace(chars[j])) - j++; - if (j == charCount) - break; - chars[i++] = chars[j++]; - } - // No spaces, so don't try again - if (i == charCount) + // No spaces, so don't try again throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(exception.Message, exception.InnerException)); - charCount = i; + } + + charCount = newCount; } } } diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseWriter.cs index 784033b7a8b91..5ced565e20c39 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseWriter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseWriter.cs @@ -750,27 +750,20 @@ protected void StartContent(char[] chars, int offset, int count) private static void VerifyWhitespace(char ch) { - if (!IsWhitespace(ch)) - throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot)); + if (!XmlConverter.IsWhitespace(ch)) + throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot)); } private static void VerifyWhitespace(string s) { - for (int i = 0; i < s.Length; i++) - if (!IsWhitespace(s[i])) - throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot)); + if (!XmlConverter.IsWhitespace(s)) + throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot)); } private static void VerifyWhitespace(char[] chars, int offset, int count) { - for (int i = 0; i < count; i++) - if (!IsWhitespace(chars[offset + i])) - throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot)); - } - - private static bool IsWhitespace(char ch) - { - return (ch == ' ' || ch == '\n' || ch == '\r' || ch == 't'); + if (!XmlConverter.IsWhitespace(chars.AsSpan(offset, count))) + throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot)); } protected static void EndContent() diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBufferReader.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBufferReader.cs index 57b71815a8068..995d64ba93daa 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBufferReader.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBufferReader.cs @@ -768,23 +768,12 @@ public int GetCharEntity(int offset, int length) public bool IsWhitespaceKey(int key) { string s = GetDictionaryString(key).Value; - for (int i = 0; i < s.Length; i++) - { - if (!XmlConverter.IsWhitespace(s[i])) - return false; - } - return true; + return XmlConverter.IsWhitespace(s); } public bool IsWhitespaceUTF8(int offset, int length) { - byte[] buffer = _buffer; - for (int i = 0; i < length; i++) - { - if (!XmlConverter.IsWhitespace((char)buffer[offset + i])) - return false; - } - return true; + return XmlConverter.IsWhitespace(_buffer.AsSpan(offset, length)); } public bool IsWhitespaceUnicode(int offset, int length) diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlConverter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlConverter.cs index 52df28425a18b..71867bf458dea 100644 --- a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlConverter.cs +++ b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlConverter.cs @@ -14,7 +14,7 @@ using System.Runtime.Serialization; using System.Collections.Generic; using System.Collections.ObjectModel; - +using System.Buffers; namespace System.Xml { @@ -30,6 +30,10 @@ internal static class XmlConverter public const int MaxUInt64Chars = 32; public const int MaxPrimitiveChars = MaxDateTimeChars; + // Matches IsWhitespace below + private static readonly IndexOfAnyValues s_whitespaceChars = IndexOfAnyValues.Create(" \t\r\n"); + private static readonly IndexOfAnyValues s_whitespaceBytes = IndexOfAnyValues.Create(" \t\r\n"u8); + public static bool ToBoolean(string value) { try @@ -1082,45 +1086,62 @@ public static int ToChars(DateTime value, byte[] chars, int offset) return offset - offsetMin; } - public static bool IsWhitespace(string s) + public static bool IsWhitespace(ReadOnlySpan chars) => + chars.IndexOfAnyExcept(s_whitespaceChars) < 0; + + public static bool IsWhitespace(ReadOnlySpan bytes) => + bytes.IndexOfAnyExcept(s_whitespaceBytes) < 0; + + public static bool IsWhitespace(char ch) => + ch is <= ' ' and (' ' or '\t' or '\r' or '\n'); + + public static int StripWhitespace(Span chars) { - for (int i = 0; i < s.Length; i++) + int count = chars.IndexOfAny(s_whitespaceChars); + if (count < 0) { - if (!IsWhitespace(s[i])) - return false; + return chars.Length; } - return true; - } - public static bool IsWhitespace(char ch) - { - return (ch <= ' ' && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')); + foreach (char c in chars.Slice(count + 1)) + { + if (!IsWhitespace(c)) + { + chars[count++] = c; + } + } + + return count; } public static string StripWhitespace(string s) { - int count = s.Length; - for (int i = 0; i < s.Length; i++) + int indexOfWhitespace = s.AsSpan().IndexOfAny(s_whitespaceChars); + if (indexOfWhitespace < 0) { - if (IsWhitespace(s[i])) + return s; + } + + int count = s.Length - 1; + foreach (char c in s.AsSpan(indexOfWhitespace + 1)) + { + if (IsWhitespace(c)) { count--; } } - if (count == s.Length) - return s; - return string.Create(count, s, (chars, s) => + return string.Create(count, s, static (chars, s) => { int count = 0; - for (int i = 0; i < s.Length; i++) + foreach (char c in s) { - char ch = s[i]; - if (!IsWhitespace(ch)) + if (!IsWhitespace(c)) { - chars[count++] = ch; + chars[count++] = c; } } + Debug.Assert(count == chars.Length); }); } } diff --git a/src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs b/src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs index 21233b44632c6..72547a9065a1f 100644 --- a/src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs +++ b/src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -13,6 +14,19 @@ namespace System.Xml /// internal static class XmlCharType { +#if DEBUG + static XmlCharType() + { + for (int i = 0; i < 128; i++) + { + char c = (char)i; + Debug.Assert(PublicIdChars.Contains(c) == IsPubidChar(c)); + Debug.Assert(AsciiCharDataChars.Contains(c) == IsCharData(c)); + Debug.Assert(WhiteSpaceChars.Contains(c) == IsWhiteSpace(c)); + } + } +#endif + // Surrogate constants internal const int SurHighStart = 0xd800; // 1101 10xx internal const int SurHighEnd = 0xdbff; @@ -39,6 +53,13 @@ internal static class XmlCharType // bitmap for public ID characters - 1 bit per character 0x0 - 0x80; no character > 0x80 is a PUBLIC ID char private const string PublicIdBitmap = "\u2400\u0000\uffbb\uafff\uffff\u87ff\ufffe\u07ff"; + private const string PublicIdChars = "\n\r !#$%'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"; + private const string AsciiCharDataChars = "\t\n\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; + private const string WhiteSpaceChars = "\t\n\r "; + + private static readonly IndexOfAnyValues s_publicIdChars = IndexOfAnyValues.Create(PublicIdChars); + private static readonly IndexOfAnyValues s_asciiCharDataChars = IndexOfAnyValues.Create(AsciiCharDataChars); + private static readonly IndexOfAnyValues s_whitespaceChars = IndexOfAnyValues.Create(WhiteSpaceChars); [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool IsWhiteSpace(char ch) => (GetCharProperties(ch) & Whitespace) != 0u; @@ -109,46 +130,36 @@ internal static void SplitSurrogateChar(int combinedChar, out char lowChar, out highChar = (char)(SurHighStart + v / 1024); } - internal static bool IsOnlyWhitespace(string? str) - { - return IsOnlyWhitespaceWithPos(str) == -1; - } + internal static bool IsOnlyWhitespace(ReadOnlySpan str) => + IsOnlyWhitespaceWithPos(str) < 0; // Character checking on strings - internal static int IsOnlyWhitespaceWithPos(string? str) + internal static int IsOnlyWhitespaceWithPos(ReadOnlySpan str) => + str.IndexOfAnyExcept(s_whitespaceChars); + + internal static int IsOnlyCharData(ReadOnlySpan str) { - if (str != null) + int i = str.IndexOfAnyExcept(s_asciiCharDataChars); + if (i < 0) { - for (int i = 0; i < str.Length; i++) - { - if ((GetCharProperties(str[i]) & Whitespace) == 0u) - { - return i; - } - } + // Fast-path: All ASCII CharData chars + return -1; } - return -1; - } - internal static int IsOnlyCharData(string str) - { - if (str != null) + for (; (uint)i < (uint)str.Length; i++) { - for (int i = 0; i < str.Length; i++) + char c = str[i]; + if (!IsCharData(c)) { - if ((GetCharProperties(str[i]) & CharData) == 0u) + if ((uint)(i + 1) >= (uint)str.Length || !char.IsSurrogatePair(c, str[i + 1])) { - if (i + 1 >= str.Length || !(XmlCharType.IsHighSurrogate(str[i]) && XmlCharType.IsLowSurrogate(str[i + 1]))) - { - return i; - } - else - { - i++; - } + return i; } + + i++; } } + return -1; } @@ -161,20 +172,8 @@ internal static bool IsOnlyDigits(string str, int startPos, int len) return str.AsSpan(startPos, len).IndexOfAnyExceptInRange('0', '9') < 0; } - internal static int IsPublicId(string str) - { - if (str != null) - { - for (int i = 0; i < str.Length; i++) - { - if (!IsPubidChar(str[i])) - { - return i; - } - } - } - return -1; - } + internal static int IsPublicId(string str) => + str.AsSpan().IndexOfAnyExcept(s_publicIdChars); // This method tests whether a value is in a given range with just one test; start and end should be constants private static bool InRange(int value, int start, int end) @@ -4286,6 +4285,5 @@ private static bool InRange(int value, int start, int end) /* FFE0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, /* FFF0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0x00, 0x00, }; - } } diff --git a/src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs b/src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs index 6f3d099097d21..c465eb96094f0 100644 --- a/src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs +++ b/src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs @@ -516,7 +516,7 @@ public static string VerifyPublicId(string publicId) // returns the position of invalid character or -1 int pos = XmlCharType.IsPublicId(publicId); - if (pos != -1) + if (pos >= 0) { throw CreateInvalidCharException(publicId, pos, ExceptionType.XmlException); } @@ -572,7 +572,7 @@ public static bool IsXmlSurrogatePair(char lowChar, char highChar) return XmlCharType.IsHighSurrogate(highChar) && XmlCharType.IsLowSurrogate(lowChar); } - // Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PublidChar + // Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PubidChar public static bool IsPublicIdChar(char ch) { return XmlCharType.IsPubidChar(ch);