diff --git a/src/libraries/Common/src/System/Net/IPv4AddressHelper.Common.cs b/src/libraries/Common/src/System/Net/IPv4AddressHelper.Common.cs index 0f57374b49616f..19a9d0f1f443d8 100644 --- a/src/libraries/Common/src/System/Net/IPv4AddressHelper.Common.cs +++ b/src/libraries/Common/src/System/Net/IPv4AddressHelper.Common.cs @@ -19,23 +19,328 @@ internal static partial class IPv4AddressHelper private const int NumberOfLabels = 4; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static ushort ToUShort(TChar value) - where TChar : unmanaged, IBinaryInteger + // + // Char parsing + // + + // Only called from the IPv6Helper, only parse the canonical format + internal static int ParseHostNumber(ReadOnlySpan str, int start, int end) { - Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte)); + Span numbers = stackalloc byte[NumberOfLabels]; + + for (int i = 0; i < numbers.Length; ++i) + { + int b = 0; + int ch; - return typeof(TChar) == typeof(char) - ? (char)(object)value - : (byte)(object)value; + for (; (start < end) && (ch = (ushort)(str[start])) != '.' && ch != ':'; ++start) + { + b = (b * 10) + ch - '0'; + } + + numbers[i] = (byte)b; + ++start; + } + + return BinaryPrimitives.ReadInt32BigEndian(numbers); } - // Only called from the IPv6Helper, only parse the canonical format - internal static int ParseHostNumber(ReadOnlySpan str, int start, int end) - where TChar : unmanaged, IBinaryInteger + // + // IsValid + // + // Performs IsValid on a substring. Updates the index to where we + // believe the IPv4 address ends + // + // Inputs: + // name + // string containing possible IPv4 address + // + // start + // offset in to start checking for IPv4 address + // + // end + // offset in of the last character we can touch in the check + // + // Outputs: + // end + // index of last character in we checked + // + // allowIPv6 + // enables parsing IPv4 addresses embedded in IPv6 address literals + // + // notImplicitFile + // do not consider this URI holding an implicit filename + // + // unknownScheme + // the check is made on an unknown scheme (suppress IPv4 canonicalization) + // + // Assumes: + // The address string is terminated by either + // end of the string, characters ':' '/' '\' '?' + // + // + // Returns: + // bool + // + // Throws: + // Nothing + // + + //Remark: MUST NOT be used unless all input indexes are verified and trusted. + internal static unsafe bool IsValid(char* name, int start, ref int end, bool allowIPv6, bool notImplicitFile, bool unknownScheme) + { + // IPv6 can only have canonical IPv4 embedded. Unknown schemes will not attempt parsing of non-canonical IPv4 addresses. + if (allowIPv6 || unknownScheme) + { + return IsValidCanonical(name, start, ref end, allowIPv6, notImplicitFile); + } + else + { + return ParseNonCanonical(name, start, ref end, notImplicitFile) != Invalid; + } + } + + // + // IsValidCanonical + // + // Checks if the substring is a valid canonical IPv4 address or an IPv4 address embedded in an IPv6 literal + // This is an attempt to parse ABNF productions from RFC3986, Section 3.2.2: + // IP-literal = "[" ( IPv6address / IPvFuture ) "]" + // IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + // dec-octet = DIGIT ; 0-9 + // / %x31-39 DIGIT ; 10-99 + // / "1" 2DIGIT ; 100-199 + // / "2" %x30-34 DIGIT ; 200-249 + // / "25" %x30-35 ; 250-255 + // + internal static unsafe bool IsValidCanonical(char* name, int start, ref int end, bool allowIPv6, bool notImplicitFile) + { + int dots = 0; + long number = 0; + bool haveNumber = false; + bool firstCharIsZero = false; + + while (start < end) + { + int ch = (ushort)(name[start]); + + if (allowIPv6) + { + // For an IPv4 address nested inside an IPv6 address, the terminator is either the IPv6 address terminator (']'), prefix ('/') or ScopeId ('%') + if (ch == ']' || ch == '/' || ch == '%') + { + break; + } + } + else if (ch == '/' || ch == '\\' || (notImplicitFile && (ch == ':' || ch == '?' || ch == '#'))) + { + // For a normal IPv4 address, the terminator is the prefix ('/' or its counterpart, '\'). If notImplicitFile is set, the terminator + // is one of the characters which signify the start of the rest of the URI - the port number (':'), query string ('?') or fragment ('#') + + break; + } + + // An explicit cast to an unsigned integer forces character values preceding '0' to underflow, eliminating one comparison below. + uint parsedCharacter = (uint)(ch - '0'); + + if (parsedCharacter < IPv4AddressHelper.Decimal) + { + // A number starting with zero should be interpreted in base 8 / octal + if (!haveNumber && parsedCharacter == 0) + { + if ((start + 1 < end) && name[start + 1] == '0') + { + // 00 is not allowed as a prefix. + return false; + } + + firstCharIsZero = true; + } + + haveNumber = true; + number = number * IPv4AddressHelper.Decimal + parsedCharacter; + if (number > byte.MaxValue) + { + return false; + } + } + else if (ch == '.') + { + // If the current character is not an integer, it may be the IPv4 component separator ('.') + + if (!haveNumber || (number > 0 && firstCharIsZero)) + { + // 0 is not allowed to prefix a number. + return false; + } + ++dots; + haveNumber = false; + number = 0; + firstCharIsZero = false; + } + else + { + return false; + } + ++start; + } + bool res = (dots == 3) && haveNumber; + if (res) + { + end = start; + } + return res; + } + + // Parse any canonical or noncanonical IPv4 formats and return a long between 0 and MaxIPv4Value. + // Return Invalid (-1) for failures. + // If the address has less than three dots, only the rightmost section is assumed to contain the combined value for + // the missing sections: 0xFF00FFFF == 0xFF.0x00.0xFF.0xFF == 0xFF.0xFFFF + internal static unsafe long ParseNonCanonical(char* name, int start, ref int end, bool notImplicitFile) { - Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte)); + int numberBase = IPv4AddressHelper.Decimal; + int ch = 0; + Span parts = stackalloc long[3]; // One part per octet. Final octet doesn't have a terminator, so is stored in currentValue. + long currentValue = 0; + bool atLeastOneChar = false; + + // Parse one dotted section at a time + int dotCount = 0; // Limit 3 + int current = start; + + for (; current < end; current++) + { + ch = (ushort)(name[current]); + currentValue = 0; + + // Figure out what base this section is in, default to base 10. + // A number starting with zero should be interpreted in base 8 / octal + // If the number starts with 0x, it should be interpreted in base 16 / hex + numberBase = IPv4AddressHelper.Decimal; + + if (ch == '0') + { + current++; + atLeastOneChar = true; + if (current < end) + { + ch = (ushort)(name[current]); + if (ch == 'x' || ch == 'X') + { + numberBase = IPv4AddressHelper.Hex; + + current++; + atLeastOneChar = false; + } + else + { + numberBase = IPv4AddressHelper.Octal; + } + } + } + + // Parse this section + for (; current < end; current++) + { + ch = (ushort)(name[current]); + int digitValue = HexConverter.FromChar(ch); + + if (digitValue >= numberBase) + { + break; // Invalid/terminator + } + currentValue = (currentValue * numberBase) + digitValue; + + if (currentValue > MaxIPv4Value) // Overflow + { + return Invalid; + } + + atLeastOneChar = true; + } + + if (current < end && ch == '.') + { + if (dotCount >= 3 // Max of 3 dots and 4 segments + || !atLeastOneChar // No empty segments: 1...1 + // Only the last segment can be more than 255 (if there are less than 3 dots) + || currentValue > 0xFF) + { + return Invalid; + } + parts[dotCount] = currentValue; + dotCount++; + atLeastOneChar = false; + continue; + } + // We don't get here unless we find an invalid character or a terminator + break; + } + + // Terminators + if (!atLeastOneChar) + { + return Invalid; // Empty trailing segment: 1.1.1. + } + else if (current >= end) + { + // end of string, allowed + } + else if (ch == '/' || ch == '\\' || (notImplicitFile && (ch == ':' || ch == '?' || ch == '#'))) + { + // For a normal IPv4 address, the terminator is the prefix ('/' or its counterpart, '\'). If notImplicitFile is set, the terminator + // is one of the characters which signify the start of the rest of the URI - the port number (':'), query string ('?') or fragment ('#') + + end = current; + } + else + { + // not a valid terminating character + return Invalid; + } + + // Parsed, reassemble and check for overflows in the last part. Previous parts have already been checked in the loop + switch (dotCount) + { + case 0: // 0xFFFFFFFF + return currentValue; + case 1: // 0xFF.0xFFFFFF + Debug.Assert(parts[0] <= 0xFF); + if (currentValue > 0xffffff) + { + return Invalid; + } + return (parts[0] << 24) | currentValue; + case 2: // 0xFF.0xFF.0xFFFF + Debug.Assert(parts[0] <= 0xFF); + Debug.Assert(parts[1] <= 0xFF); + if (currentValue > 0xffff) + { + return Invalid; + } + return (parts[0] << 24) | (parts[1] << 16) | currentValue; + case 3: // 0xFF.0xFF.0xFF.0xFF + Debug.Assert(parts[0] <= 0xFF); + Debug.Assert(parts[1] <= 0xFF); + Debug.Assert(parts[2] <= 0xFF); + if (currentValue > 0xff) + { + return Invalid; + } + return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | currentValue; + default: + return Invalid; + } + } + + // + // Byte parsing + // + + // Only called from the IPv6Helper, only parse the canonical format + internal static int ParseHostNumber(ReadOnlySpan str, int start, int end) + { Span numbers = stackalloc byte[NumberOfLabels]; for (int i = 0; i < numbers.Length; ++i) @@ -43,7 +348,7 @@ internal static int ParseHostNumber(ReadOnlySpan str, int start, i int b = 0; int ch; - for (; (start < end) && (ch = ToUShort(str[start])) != '.' && ch != ':'; ++start) + for (; (start < end) && (ch = (ushort)(str[start])) != '.' && ch != ':'; ++start) { b = (b * 10) + ch - '0'; } @@ -97,8 +402,7 @@ internal static int ParseHostNumber(ReadOnlySpan str, int start, i // //Remark: MUST NOT be used unless all input indexes are verified and trusted. - internal static unsafe bool IsValid(TChar* name, int start, ref int end, bool allowIPv6, bool notImplicitFile, bool unknownScheme) - where TChar : unmanaged, IBinaryInteger + internal static unsafe bool IsValid(byte* name, int start, ref int end, bool allowIPv6, bool notImplicitFile, bool unknownScheme) { // IPv6 can only have canonical IPv4 embedded. Unknown schemes will not attempt parsing of non-canonical IPv4 addresses. if (allowIPv6 || unknownScheme) @@ -124,11 +428,8 @@ internal static unsafe bool IsValid(TChar* name, int start, ref int end, // / "2" %x30-34 DIGIT ; 200-249 // / "25" %x30-35 ; 250-255 // - internal static unsafe bool IsValidCanonical(TChar* name, int start, ref int end, bool allowIPv6, bool notImplicitFile) - where TChar : unmanaged, IBinaryInteger + internal static unsafe bool IsValidCanonical(byte* name, int start, ref int end, bool allowIPv6, bool notImplicitFile) { - Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte)); - int dots = 0; long number = 0; bool haveNumber = false; @@ -136,7 +437,7 @@ internal static unsafe bool IsValidCanonical(TChar* name, int start, ref while (start < end) { - int ch = ToUShort(name[start]); + int ch = (ushort)(name[start]); if (allowIPv6) { @@ -162,7 +463,7 @@ internal static unsafe bool IsValidCanonical(TChar* name, int start, ref // A number starting with zero should be interpreted in base 8 / octal if (!haveNumber && parsedCharacter == 0) { - if ((start + 1 < end) && name[start + 1] == TChar.CreateTruncating('0')) + if ((start + 1 < end) && name[start + 1] == (byte)('0')) { // 00 is not allowed as a prefix. return false; @@ -210,11 +511,8 @@ internal static unsafe bool IsValidCanonical(TChar* name, int start, ref // Return Invalid (-1) for failures. // If the address has less than three dots, only the rightmost section is assumed to contain the combined value for // the missing sections: 0xFF00FFFF == 0xFF.0x00.0xFF.0xFF == 0xFF.0xFFFF - internal static unsafe long ParseNonCanonical(TChar* name, int start, ref int end, bool notImplicitFile) - where TChar : unmanaged, IBinaryInteger + internal static unsafe long ParseNonCanonical(byte* name, int start, ref int end, bool notImplicitFile) { - Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte)); - int numberBase = IPv4AddressHelper.Decimal; int ch = 0; Span parts = stackalloc long[3]; // One part per octet. Final octet doesn't have a terminator, so is stored in currentValue. @@ -227,7 +525,7 @@ internal static unsafe long ParseNonCanonical(TChar* name, int start, ref for (; current < end; current++) { - ch = ToUShort(name[current]); + ch = (ushort)(name[current]); currentValue = 0; // Figure out what base this section is in, default to base 10. @@ -241,7 +539,7 @@ internal static unsafe long ParseNonCanonical(TChar* name, int start, ref atLeastOneChar = true; if (current < end) { - ch = ToUShort(name[current]); + ch = (ushort)(name[current]); if (ch == 'x' || ch == 'X') { @@ -260,7 +558,7 @@ internal static unsafe long ParseNonCanonical(TChar* name, int start, ref // Parse this section for (; current < end; current++) { - ch = ToUShort(name[current]); + ch = (ushort)(name[current]); int digitValue = HexConverter.FromChar(ch); if (digitValue >= numberBase) diff --git a/src/libraries/Common/src/System/Net/IPv6AddressHelper.Common.cs b/src/libraries/Common/src/System/Net/IPv6AddressHelper.Common.cs index d27cd18b8d56c3..b4ced5d344ff89 100644 --- a/src/libraries/Common/src/System/Net/IPv6AddressHelper.Common.cs +++ b/src/libraries/Common/src/System/Net/IPv6AddressHelper.Common.cs @@ -95,11 +95,8 @@ internal static bool ShouldHaveIpv4Embedded(ReadOnlySpan numbers) // Remarks: MUST NOT be used unless all input indexes are verified and trusted. // start must be next to '[' position, or error is reported - internal static unsafe bool IsValidStrict(TChar* name, int start, int end) - where TChar : unmanaged, IBinaryInteger + internal static unsafe bool IsValidStrict(char* name, int start, int end) { - Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte)); - // Number of components in this IPv6 address int sequenceCount = 0; // Length of the component currently being constructed @@ -114,7 +111,7 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end // An IPv6 address may begin with a start character ('['). If it does, it must end with an end // character (']'). - if (start < end && name[start] == TChar.CreateTruncating('[')) + if (start < end && name[start] == ('[')) { start++; needsClosingBracket = true; @@ -126,7 +123,7 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end } // Starting with a colon character is only valid if another colon follows. - if (name[start] == TChar.CreateTruncating(':') && (start + 1 >= end || name[start + 1] != TChar.CreateTruncating(':'))) + if (name[start] == (':') && (start + 1 >= end || name[start + 1] != (':'))) { return false; } @@ -134,7 +131,7 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end int i; for (i = start; i < end; ++i) { - int currentCh = IPv4AddressHelper.ToUShort(name[i]); + int currentCh = (ushort)(name[i]); if (HexConverter.IsHexChar(currentCh)) { @@ -163,11 +160,11 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end while (i + 1 < end) { i++; - if (name[i] == TChar.CreateTruncating(']')) + if (name[i] == (']')) { goto case ']'; } - else if (name[i] == TChar.CreateTruncating('/')) + else if (name[i] == ('/')) { goto case '/'; } @@ -183,19 +180,19 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end // If there's more after the closing bracket, it must be a port. // We don't use the port, but we still validate it. - if (i + 1 < end && name[i + 1] != TChar.CreateTruncating(':')) + if (i + 1 < end && name[i + 1] != (':')) { return false; } // If there is a port, it must either be a hexadecimal or decimal number. // If the next two characters are '0x' then it's a hexadecimal number. Skip the prefix. - if (i + 3 < end && name[i + 2] == TChar.CreateTruncating('0') && name[i + 3] == TChar.CreateTruncating('x')) + if (i + 3 < end && name[i + 2] == ('0') && name[i + 3] == ('x')) { i += 4; for (; i < end; i++) { - int ch = IPv4AddressHelper.ToUShort(name[i]); + int ch = (ushort)(name[i]); if (!HexConverter.IsHexChar(ch)) { @@ -208,7 +205,7 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end i += 2; for (; i < end; i++) { - if (!char.IsAsciiDigit((char)IPv4AddressHelper.ToUShort(name[i]))) + if (!char.IsAsciiDigit((char)(ushort)(name[i]))) { return false; } @@ -218,7 +215,7 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end case ':': // If the next character after a colon is another colon, the address contains a compressor ('::'). - if ((i > 0) && (name[i - 1] == TChar.CreateTruncating(':'))) + if ((i > 0) && (name[i - 1] == (':'))) { if (haveCompressor) { @@ -308,23 +305,404 @@ internal static unsafe bool IsValidStrict(TChar* name, int start, int end // Nothing // - internal static void Parse(ReadOnlySpan address, scoped Span numbers, out ReadOnlySpan scopeId) - where TChar : unmanaged, IBinaryInteger + internal static void Parse(ReadOnlySpan address, scoped Span numbers, out ReadOnlySpan scopeId) + { + int number = 0; + int currentCh; + int index = 0; + int compressorIndex = -1; + bool numberIsValid = true; + + scopeId = ReadOnlySpan.Empty; + + // Skip the start '[' character, if present. Stop parsing at the end IPv6 address terminator (']'). + for (int i = (address[0] == ('[') ? 1 : 0); i < address.Length && address[i] != (']');) + { + currentCh = (ushort)(address[i]); + + switch (currentCh) + { + case '%': + if (numberIsValid) + { + numbers[index++] = (ushort)number; + numberIsValid = false; + } + + // The scope follows a '%' and terminates at the natural end of the address, the address terminator (']') or the prefix delimiter ('/'). + int scopeStart = i; + + for (++i; i < address.Length && address[i] != (']') && address[i] != ('/'); ++i) + { + } + scopeId = address.Slice(scopeStart, i - scopeStart); + + // Ignore the prefix (if any.) + for (; i < address.Length && address[i] != (']'); ++i) + { + } + break; + + case ':': + numbers[index++] = (ushort)number; + number = 0; + // Two sequential colons form a compressor ('::'). + ++i; + if (address[i] == (':')) + { + compressorIndex = index; + ++i; + } + else if ((compressorIndex < 0) && (index < 6)) + { + // No point checking for IPv4 address if we don't + // have a compressor or we haven't seen 6 16-bit + // numbers yet. + break; + } + + // Check to see if the upcoming number is really an IPv4 + // address. If it is, convert it to 2 ushort numbers + for (int j = i; j < address.Length && + (address[j] != (']')) && + (address[j] != (':')) && + (address[j] != ('%')) && + (address[j] != ('/')) && + (j < i + 4); ++j) + { + + if (address[j] == ('.')) + { + // We have an IPv4 address. Find the end of it: + // we know that since we have a valid IPv6 + // address, the only things that will terminate + // the IPv4 address are the prefix delimiter '/' + // or the end-of-string (which we conveniently + // delimited with ']'). + while (j < address.Length && (address[j] != (']')) && (address[j] != ('/')) && (address[j] != ('%'))) + { + ++j; + } + int ipv4Address = IPv4AddressHelper.ParseHostNumber(address, i, j); + + numbers[index++] = (ushort)(ipv4Address >> 16); + numbers[index++] = (ushort)(ipv4Address & 0xFFFF); + i = j; + + // Set this to avoid adding another number to + // the array if there's a prefix + number = 0; + numberIsValid = false; + break; + } + } + break; + + case '/': + if (numberIsValid) + { + numbers[index++] = (ushort)number; + numberIsValid = false; + } + + for (++i; i < address.Length && address[i] != (']'); i++) + { + } + + break; + + default: + int characterValue = HexConverter.FromChar(currentCh); + + number = number * IPv6AddressHelper.Hex + characterValue; + i++; + break; + } + } + + // Add number to the array if it's not the prefix length or part of + // an IPv4 address that's already been handled + if (numberIsValid) + { + numbers[index++] = (ushort)number; + } + + // If we had a compressor sequence ("::") then we need to expand the + // numbers array. + if (compressorIndex > 0) + { + int toIndex = NumberOfLabels - 1; + int fromIndex = index - 1; + + // If fromIndex and toIndex are the same, it means that "zero bits" are already in the correct place. + // This happens for leading and trailing compression. + if (fromIndex != toIndex) + { + for (int i = index - compressorIndex; i > 0; --i) + { + numbers[toIndex--] = numbers[fromIndex]; + numbers[fromIndex--] = 0; + } + } + } + } + + // + // IsValidStrict + // + // Determine whether a name is a valid IPv6 address. Rules are: + // + // * 8 groups of 16-bit hex numbers, separated by ':' + // * a *single* run of zeros can be compressed using the symbol '::' + // * an optional string of a ScopeID delimited by '%' + // * the last 32 bits in an address can be represented as an IPv4 address + // + // Difference between IsValid() and IsValidStrict() is that IsValid() expects part of the string to + // be ipv6 address where as IsValidStrict() expects strict ipv6 address. + // + // Inputs: + // name + // IPv6 address in string format + // + // Outputs: + // Nothing + // + // Assumes: + // the correct name is terminated by ']' character + // + // Returns: + // true if is IPv6 address, else false + // + // Throws: + // Nothing + // + + // Remarks: MUST NOT be used unless all input indexes are verified and trusted. + // start must be next to '[' position, or error is reported + internal static unsafe bool IsValidStrict(byte* name, int start, int end) { - Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte)); + // Number of components in this IPv6 address + int sequenceCount = 0; + // Length of the component currently being constructed + int sequenceLength = 0; + bool haveCompressor = false; + bool haveIPv4Address = false; + bool expectingNumber = true; + // Start position of the previous component + int lastSequence = 1; + + bool needsClosingBracket = false; + // An IPv6 address may begin with a start character ('['). If it does, it must end with an end + // character (']'). + if (start < end && name[start] == (byte)('[')) + { + start++; + needsClosingBracket = true; + + // IsValidStrict() is only called if there is a ':' in the name string, i.e. + // it is a possible IPv6 address. So, if the string starts with a '[' and + // the pointer is advanced here there are still more characters to parse. + Debug.Assert(start < end); + } + + // Starting with a colon character is only valid if another colon follows. + if (name[start] == (byte)(':') && (start + 1 >= end || name[start + 1] != (byte)(':'))) + { + return false; + } + + int i; + for (i = start; i < end; ++i) + { + int currentCh = (ushort)(name[i]); + + if (HexConverter.IsHexChar(currentCh)) + { + ++sequenceLength; + expectingNumber = false; + } + else + { + if (sequenceLength > 4) + { + return false; + } + if (sequenceLength != 0) + { + ++sequenceCount; + lastSequence = i - sequenceLength; + sequenceLength = 0; + } + + switch (currentCh) + { + case '%': + // An IPv6 address is separated from its scope by a '%' character. The scope + // is terminated by the natural end of the address, the address end character (']') + // or the start of the prefix ('/'). + while (i + 1 < end) + { + i++; + if (name[i] == (byte)(']')) + { + goto case ']'; + } + else if (name[i] == (byte)('/')) + { + goto case '/'; + } + } + break; + + case ']': + if (!needsClosingBracket) + { + return false; + } + needsClosingBracket = false; + + // If there's more after the closing bracket, it must be a port. + // We don't use the port, but we still validate it. + if (i + 1 < end && name[i + 1] != (byte)(':')) + { + return false; + } + + // If there is a port, it must either be a hexadecimal or decimal number. + // If the next two characters are '0x' then it's a hexadecimal number. Skip the prefix. + if (i + 3 < end && name[i + 2] == (byte)('0') && name[i + 3] == (byte)('x')) + { + i += 4; + for (; i < end; i++) + { + int ch = (ushort)(name[i]); + + if (!HexConverter.IsHexChar(ch)) + { + return false; + } + } + } + else + { + i += 2; + for (; i < end; i++) + { + if (!char.IsAsciiDigit((char)(ushort)(name[i]))) + { + return false; + } + } + } + continue; + + case ':': + // If the next character after a colon is another colon, the address contains a compressor ('::'). + if ((i > 0) && (name[i - 1] == (byte)(':'))) + { + if (haveCompressor) + { + // can only have one per IPv6 address + return false; + } + haveCompressor = true; + expectingNumber = false; + } + else + { + expectingNumber = true; + } + break; + + case '/': + // A prefix in an IPv6 address is invalid. + return false; + + case '.': + if (haveIPv4Address) + { + return false; + } + + i = end; + if (!IPv4AddressHelper.IsValid(name, lastSequence, ref i, true, false, false)) + { + return false; + } + // An IPv4 address takes 2 slots in an IPv6 address. One was just counted meeting the '.' + ++sequenceCount; + lastSequence = i - sequenceLength; + sequenceLength = 0; + haveIPv4Address = true; + --i; // it will be incremented back on the next loop + break; + + default: + return false; + } + sequenceLength = 0; + } + } + + if (sequenceLength != 0) + { + if (sequenceLength > 4) + { + return false; + } + + ++sequenceCount; + } + + // These sequence counts are -1 because it is implied in end-of-sequence. + + const int ExpectedSequenceCount = 8; + return + !expectingNumber && + (haveCompressor ? (sequenceCount < ExpectedSequenceCount) : (sequenceCount == ExpectedSequenceCount)) && + !needsClosingBracket; + } + + // + // Parse + // + // Convert this IPv6 address into a sequence of 8 16-bit numbers + // + // Inputs: + // Name + // The validated IPv6 address + // + // Outputs: + // numbers + // Array filled in with the numbers in the IPv6 groups + // + // scopeId + // Set to the text after the scope separator (%) if found + // + // Assumes: + // has been validated and contains only hex digits in groups of + // 16-bit numbers, the characters ':', '/' and '%', and a possible IPv4 + // address + // + // Throws: + // Nothing + // + + internal static void Parse(ReadOnlySpan address, scoped Span numbers, out ReadOnlySpan scopeId) + { int number = 0; int currentCh; int index = 0; int compressorIndex = -1; bool numberIsValid = true; - scopeId = ReadOnlySpan.Empty; + scopeId = ReadOnlySpan.Empty; // Skip the start '[' character, if present. Stop parsing at the end IPv6 address terminator (']'). - for (int i = (address[0] == TChar.CreateTruncating('[') ? 1 : 0); i < address.Length && address[i] != TChar.CreateTruncating(']');) + for (int i = (address[0] == (byte)('[') ? 1 : 0); i < address.Length && address[i] != (byte)(']');) { - currentCh = IPv4AddressHelper.ToUShort(address[i]); + currentCh = (ushort)(address[i]); switch (currentCh) { @@ -338,13 +716,13 @@ internal static void Parse(ReadOnlySpan address, scoped Span(ReadOnlySpan address, scoped Span(ReadOnlySpan address, scoped Span(ReadOnlySpan address, scoped Span(ReadOnlySpan address, scoped Span - private static KnownHeader? GetCandidate(ReadOnlySpan key) - where T : struct, INumberBase + private static KnownHeader? GetCandidate(ReadOnlySpan key) { // Lookup is performed by first switching on the header name's length, and then switching // on the most unique position in that length's string. [MethodImpl(MethodImplOptions.AggressiveInlining)] - static int GetLower(T value) => int.CreateTruncating(value) | 0x20; + static int GetLower(char value) => value | 0x20; + + switch (key.Length) + { + case 2: + return TE; // TE + + case 3: + switch (GetLower(key[0])) + { + case 'a': return Age; // [A]ge + case 'p': return P3P; // [P]3P + case 't': return TSV; // [T]SV + case 'v': return Via; // [V]ia + } + break; + + case 4: + switch (GetLower(key[0])) + { + case 'd': return Date; // [D]ate + case 'e': return ETag; // [E]Tag + case 'f': return From; // [F]rom + case 'h': return Host; // [H]ost + case 'l': return Link; // [L]ink + case 'v': return Vary; // [V]ary + } + break; + + case 5: + switch (GetLower(key[0])) + { + case 'a': return Allow; // [A]llow + case 'r': return Range; // [R]ange + } + break; + + case 6: + switch (GetLower(key[0])) + { + case 'a': return Accept; // [A]ccept + case 'c': return Cookie; // [C]ookie + case 'e': return Expect; // [E]xpect + case 'o': return Origin; // [O]rigin + case 'p': return Pragma; // [P]ragma + case 's': return Server; // [S]erver + } + break; + + case 7: + switch (GetLower(key[0])) + { + case ':': return PseudoStatus; // [:]status + case 'a': return AltSvc; // [A]lt-Svc + case 'c': return Cookie2; // [C]ookie2 + case 'e': return Expires; // [E]xpires + case 'r': + switch (GetLower(key[3])) + { + case 'e': return Referer; // [R]ef[e]rer + case 'r': return Refresh; // [R]ef[r]esh + } + break; + case 't': return Trailer; // [T]railer + case 'u': return Upgrade; // [U]pgrade + case 'w': return Warning; // [W]arning + case 'x': return XCache; // [X]-Cache + } + break; + + case 8: + switch (GetLower(key[3])) + { + case '-': return AltUsed; // Alt[-]Used + case 'a': return Location; // Loc[a]tion + case 'm': return IfMatch; // If-[M]atch + case 'r': return IfRange; // If-[R]ange + } + break; + + case 9: + return ExpectCT; // Expect-CT + + case 10: + switch (GetLower(key[0])) + { + case 'c': return Connection; // [C]onnection + case 'k': return KeepAlive; // [K]eep-Alive + case 's': return SetCookie; // [S]et-Cookie + case 'u': return UserAgent; // [U]ser-Agent + } + break; + + case 11: + switch (GetLower(key[0])) + { + case 'c': return ContentMD5; // [C]ontent-MD5 + case 'g': return GrpcStatus; // [g]rpc-status + case 'r': return RetryAfter; // [R]etry-After + case 's': return SetCookie2; // [S]et-Cookie2 + case 'x': return XServedBy; // [X]-Served-By + } + break; + + case 12: + switch (GetLower(key[5])) + { + case 'd': return XMSEdgeRef; // X-MSE[d]ge-Ref + case 'e': return XPoweredBy; // X-Pow[e]red-By + case 'm': return GrpcMessage; // grpc-[m]essage + case 'n': return ContentType; // Conte[n]t-Type + case 'o': return MaxForwards; // Max-F[o]rwards + case 't': return AcceptPatch; // Accep[t]-Patch + case 'u': return XRequestID; // X-Req[u]est-ID + } + break; + + case 13: + switch (GetLower(key[12])) + { + case 'd': return LastModified; // Last-Modifie[d] + case 'e': return ContentRange; // Content-Rang[e] + case 'g': + switch (GetLower(key[0])) + { + case 's': return ServerTiming; // [S]erver-Timin[g] + case 'g': return GrpcEncoding; // [g]rpc-encodin[g] + } + break; + case 'h': return IfNoneMatch; // If-None-Matc[h] + case 'l': return CacheControl; // Cache-Contro[l] + case 'n': return Authorization; // Authorizatio[n] + case 's': return AcceptRanges; // Accept-Range[s] + case 't': return ProxySupport; // Proxy-Suppor[t] + } + break; + + case 14: + switch (GetLower(key[0])) + { + case 'a': return AcceptCharset; // [A]ccept-Charset + case 'c': return ContentLength; // [C]ontent-Length + } + break; + + case 15: + switch (GetLower(key[7])) + { + case '-': return XFrameOptions; // X-Frame[-]Options + case 'e': return AcceptEncoding; // Accept-[E]ncoding + case 'k': return PublicKeyPins; // Public-[K]ey-Pins + case 'l': return AcceptLanguage; // Accept-[L]anguage + case 'm': return XUACompatible; // X-UA-Co[m]patible + case 'r': return ReferrerPolicy; // Referre[r]-Policy + } + break; + + case 16: + switch (GetLower(key[11])) + { + case 'a': return ContentLocation; // Content-Loc[a]tion + case 'c': + switch (GetLower(key[0])) + { + case 'p': return ProxyConnection; // [P]roxy-Conne[c]tion + case 'x': return XXssProtection; // [X]-XSS-Prote[c]tion + } + break; + case 'g': return ContentLanguage; // Content-Lan[g]uage + case 'i': return WWWAuthenticate; // WWW-Authent[i]cate + case 'o': return ContentEncoding; // Content-Enc[o]ding + case 'r': return XAspNetVersion; // X-AspNet-Ve[r]sion + } + break; + + case 17: + switch (GetLower(key[0])) + { + case 'i': return IfModifiedSince; // [I]f-Modified-Since + case 's': return SecWebSocketKey; // [S]ec-WebSocket-Key + case 't': return TransferEncoding; // [T]ransfer-Encoding + } + break; + + case 18: + switch (GetLower(key[0])) + { + case 'p': return ProxyAuthenticate; // [P]roxy-Authenticate + case 'x': return XContentDuration; // [X]-Content-Duration + } + break; + + case 19: + switch (GetLower(key[0])) + { + case 'c': return ContentDisposition; // [C]ontent-Disposition + case 'i': return IfUnmodifiedSince; // [I]f-Unmodified-Since + case 'p': return ProxyAuthorization; // [P]roxy-Authorization + case 't': return TimingAllowOrigin; // [T]iming-Allow-Origin + } + break; + + case 20: + return SecWebSocketAccept; // Sec-WebSocket-Accept + + case 21: + return SecWebSocketVersion; // Sec-WebSocket-Version + + case 22: + switch (GetLower(key[0])) + { + case 'a': return AccessControlMaxAge; // [A]ccess-Control-Max-Age + case 's': return SecWebSocketProtocol; // [S]ec-WebSocket-Protocol + case 'x': return XContentTypeOptions; // [X]-Content-Type-Options + } + break; + + case 23: + return ContentSecurityPolicy; // Content-Security-Policy + + case 24: + return SecWebSocketExtensions; // Sec-WebSocket-Extensions + + case 25: + switch (GetLower(key[0])) + { + case 's': return StrictTransportSecurity; // [S]trict-Transport-Security + case 'u': return UpgradeInsecureRequests; // [U]pgrade-Insecure-Requests + } + break; + + case 27: + return AccessControlAllowOrigin; // Access-Control-Allow-Origin + + case 28: + switch (GetLower(key[21])) + { + case 'h': return AccessControlAllowHeaders; // Access-Control-Allow-[H]eaders + case 'm': return AccessControlAllowMethods; // Access-Control-Allow-[M]ethods + case '-': return CrossOriginResourcePolicy; // Cross-Origin-Resource[-]Policy + } + break; + + case 29: + return AccessControlExposeHeaders; // Access-Control-Expose-Headers + + case 32: + return AccessControlAllowCredentials; // Access-Control-Allow-Credentials + } + + return null; + } + + private static KnownHeader? GetCandidate(ReadOnlySpan key) + { + // Lookup is performed by first switching on the header name's length, and then switching + // on the most unique position in that length's string. + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static int GetLower(byte value) => value | 0x20; switch (key.Length) { @@ -387,7 +645,7 @@ internal static class KnownHeaders public static KnownHeader? TryGetKnownHeader(string name) { - KnownHeader? candidate = GetCandidate(name); + KnownHeader? candidate = GetCandidate(name); if (candidate != null && StringComparer.OrdinalIgnoreCase.Equals(name, candidate.Name)) { return candidate; diff --git a/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs b/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs index e8a28bde6be3b3..894ca50a83a354 100644 --- a/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs +++ b/src/libraries/System.Net.Primitives/src/System/Net/IPAddress.cs @@ -496,7 +496,7 @@ bool IUtf8SpanFormattable.TryFormat(Span utf8Destination, out int bytesWri // format and provider are explicitly ignored TryFormatCore(utf8Destination, out bytesWritten); - private bool TryFormatCore(Span destination, out int charsWritten) where TChar : unmanaged, IBinaryInteger + private bool TryFormatCore(Span destination, out int charsWritten) { if (IsIPv4) { @@ -515,7 +515,43 @@ private bool TryFormatCore(Span destination, out int charsWritten) } } - Span tmpDestination = stackalloc TChar[IPAddressParser.MaxIPv6StringLength]; + Span tmpDestination = stackalloc char[IPAddressParser.MaxIPv6StringLength]; + Debug.Assert(tmpDestination.Length >= IPAddressParser.MaxIPv4StringLength); + + int written = IsIPv4 ? + IPAddressParser.FormatIPv4Address(PrivateAddress, tmpDestination) : + IPAddressParser.FormatIPv6Address(_numbers, PrivateScopeId, tmpDestination); + + if (tmpDestination.Slice(0, written).TryCopyTo(destination)) + { + charsWritten = written; + return true; + } + + charsWritten = 0; + return false; + } + + private bool TryFormatCore(Span destination, out int charsWritten) + { + if (IsIPv4) + { + if (destination.Length >= IPAddressParser.MaxIPv4StringLength) + { + charsWritten = IPAddressParser.FormatIPv4Address(_addressOrScopeId, destination); + return true; + } + } + else + { + if (destination.Length >= IPAddressParser.MaxIPv6StringLength) + { + charsWritten = IPAddressParser.FormatIPv6Address(_numbers, _addressOrScopeId, destination); + return true; + } + } + + Span tmpDestination = stackalloc byte[IPAddressParser.MaxIPv6StringLength]; Debug.Assert(tmpDestination.Length >= IPAddressParser.MaxIPv4StringLength); int written = IsIPv4 ? diff --git a/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs b/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs index e9485113f75826..b44936c90b854e 100644 --- a/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs +++ b/src/libraries/System.Net.Primitives/src/System/Net/IPAddressParser.cs @@ -16,12 +16,11 @@ internal static class IPAddressParser internal const int MaxIPv4StringLength = 15; // 4 numbers separated by 3 periods, with up to 3 digits per number internal const int MaxIPv6StringLength = 65; - public static unsafe bool IsValid(ReadOnlySpan ipSpan) - where TChar : unmanaged, IBinaryInteger + public static unsafe bool IsValid(ReadOnlySpan ipSpan) { - fixed (TChar* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) + fixed (char* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) { - if (ipSpan.Contains(TChar.CreateTruncating(':'))) + if (ipSpan.Contains(':')) { return IPv6AddressHelper.IsValidStrict(ipStringPtr, 0, ipSpan.Length); } @@ -34,12 +33,52 @@ public static unsafe bool IsValid(ReadOnlySpan ipSpan) } } - internal static IPAddress? Parse(ReadOnlySpan ipSpan, bool tryParse) - where TChar : unmanaged, IBinaryInteger + public static unsafe bool IsValid(ReadOnlySpan ipSpan) { - Debug.Assert(typeof(TChar) == typeof(byte) || typeof(TChar) == typeof(char)); + fixed (byte* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) + { + if (ipSpan.Contains((byte)':')) + { + return IPv6AddressHelper.IsValidStrict(ipStringPtr, 0, ipSpan.Length); + } + else + { + int end = ipSpan.Length; + long address = IPv4AddressHelper.ParseNonCanonical(ipStringPtr, 0, ref end, notImplicitFile: true); + return address != IPv4AddressHelper.Invalid && end == ipSpan.Length; + } + } + } + + internal static IPAddress? Parse(ReadOnlySpan ipSpan, bool tryParse) + { + if (ipSpan.Contains(':')) + { + // The address is parsed as IPv6 if and only if it contains a colon. This is valid because + // we don't support/parse a port specification at the end of an IPv4 address. + Span numbers = stackalloc ushort[IPAddressParserStatics.IPv6AddressShorts]; + numbers.Clear(); + if (TryParseIPv6(ipSpan, numbers, IPAddressParserStatics.IPv6AddressShorts, out uint scope)) + { + return new IPAddress(numbers, scope); + } + } + else if (TryParseIpv4(ipSpan, out long address)) + { + return new IPAddress(address); + } + + if (tryParse) + { + return null; + } + + throw new FormatException(SR.dns_bad_ip_address, new SocketException(SocketError.InvalidArgument)); + } - if (ipSpan.Contains(TChar.CreateTruncating(':'))) + internal static IPAddress? Parse(ReadOnlySpan ipSpan, bool tryParse) + { + if (ipSpan.Contains((byte)':')) { // The address is parsed as IPv6 if and only if it contains a colon. This is valid because // we don't support/parse a port specification at the end of an IPv4 address. @@ -63,13 +102,12 @@ public static unsafe bool IsValid(ReadOnlySpan ipSpan) throw new FormatException(SR.dns_bad_ip_address, new SocketException(SocketError.InvalidArgument)); } - private static unsafe bool TryParseIpv4(ReadOnlySpan ipSpan, out long address) - where TChar : unmanaged, IBinaryInteger + private static unsafe bool TryParseIpv4(ReadOnlySpan ipSpan, out long address) { int end = ipSpan.Length; long tmpAddr; - fixed (TChar* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) + fixed (char* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) { tmpAddr = IPv4AddressHelper.ParseNonCanonical(ipStringPtr, 0, ref end, notImplicitFile: true); } @@ -87,13 +125,34 @@ private static unsafe bool TryParseIpv4(ReadOnlySpan ipSpan, out l return false; } - private static unsafe bool TryParseIPv6(ReadOnlySpan ipSpan, Span numbers, int numbersLength, out uint scope) - where TChar : unmanaged, IBinaryInteger + private static unsafe bool TryParseIpv4(ReadOnlySpan ipSpan, out long address) + { + int end = ipSpan.Length; + long tmpAddr; + + fixed (byte* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) + { + tmpAddr = IPv4AddressHelper.ParseNonCanonical(ipStringPtr, 0, ref end, notImplicitFile: true); + } + + if (tmpAddr != IPv4AddressHelper.Invalid && end == ipSpan.Length) + { + // IPv4AddressHelper.ParseNonCanonical returns the bytes in host order. + // Convert to network order and return success. + address = (uint)IPAddress.HostToNetworkOrder(unchecked((int)tmpAddr)); + return true; + } + + // Failed to parse the address. + address = 0; + return false; + } + + private static unsafe bool TryParseIPv6(ReadOnlySpan ipSpan, Span numbers, int numbersLength, out uint scope) { - Debug.Assert(typeof(TChar) == typeof(char) || typeof(TChar) == typeof(byte)); Debug.Assert(numbersLength >= IPAddressParserStatics.IPv6AddressShorts); - fixed (TChar* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) + fixed (char* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) { if (!IPv6AddressHelper.IsValidStrict(ipStringPtr, 0, ipSpan.Length)) { @@ -102,27 +161,61 @@ private static unsafe bool TryParseIPv6(ReadOnlySpan ipSpan, Span< } } - IPv6AddressHelper.Parse(ipSpan, numbers, out ReadOnlySpan scopeIdSpan); + IPv6AddressHelper.Parse(ipSpan, numbers, out ReadOnlySpan scopeIdSpan); if (scopeIdSpan.Length > 1) { bool parsedNumericScope; scopeIdSpan = scopeIdSpan.Slice(1); - // scopeId is a numeric value - if (typeof(TChar) == typeof(byte)) - { - ReadOnlySpan castScopeIdSpan = MemoryMarshal.Cast(scopeIdSpan); + parsedNumericScope = uint.TryParse(scopeIdSpan, NumberStyles.None, CultureInfo.InvariantCulture, out scope); - parsedNumericScope = uint.TryParse(castScopeIdSpan, NumberStyles.None, CultureInfo.InvariantCulture, out scope); + if (parsedNumericScope) + { + return true; } else { - ReadOnlySpan castScopeIdSpan = MemoryMarshal.Cast(scopeIdSpan); + uint interfaceIndex = InterfaceInfoPal.InterfaceNameToIndex(scopeIdSpan); - parsedNumericScope = uint.TryParse(castScopeIdSpan, NumberStyles.None, CultureInfo.InvariantCulture, out scope); + if (interfaceIndex > 0) + { + scope = interfaceIndex; + return true; // scopeId is a known interface name + } } + // scopeId is an unknown interface name + } + + // scopeId is not presented + scope = 0; + return true; + } + + private static unsafe bool TryParseIPv6(ReadOnlySpan ipSpan, Span numbers, int numbersLength, out uint scope) + { + Debug.Assert(numbersLength >= IPAddressParserStatics.IPv6AddressShorts); + + fixed (byte* ipStringPtr = &MemoryMarshal.GetReference(ipSpan)) + { + if (!IPv6AddressHelper.IsValidStrict(ipStringPtr, 0, ipSpan.Length)) + { + scope = 0; + return false; + } + } + + IPv6AddressHelper.Parse(ipSpan, numbers, out ReadOnlySpan scopeIdSpan); + + if (scopeIdSpan.Length > 1) + { + bool parsedNumericScope; + scopeIdSpan = scopeIdSpan.Slice(1); + + // scopeId is a numeric value + parsedNumericScope = uint.TryParse(scopeIdSpan, NumberStyles.None, CultureInfo.InvariantCulture, out scope); + if (parsedNumericScope) { return true; @@ -146,23 +239,66 @@ private static unsafe bool TryParseIPv6(ReadOnlySpan ipSpan, Span< return true; } - internal static int FormatIPv4Address(uint address, Span addressString) - where TChar : unmanaged, IBinaryInteger + internal static int FormatIPv4Address(uint address, Span addressString) + { + address = (uint)IPAddress.NetworkToHostOrder(unchecked((int)address)); + + int pos = FormatByte(address >> 24, addressString); + addressString[pos++] = '.'; + pos += FormatByte(address >> 16, addressString.Slice(pos)); + addressString[pos++] = '.'; + pos += FormatByte(address >> 8, addressString.Slice(pos)); + addressString[pos++] = '.'; + pos += FormatByte(address, addressString.Slice(pos)); + + return pos; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static int FormatByte(uint number, Span addressString) + { + number &= 0xFF; + + if (number >= 10) + { + uint hundreds, tens; + if (number >= 100) + { + (uint hundredsAndTens, number) = Math.DivRem(number, 10); + (hundreds, tens) = Math.DivRem(hundredsAndTens, 10); + + addressString[2] = (char)('0' + number); + addressString[1] = (char)('0' + tens); + addressString[0] = (char)('0' + hundreds); + return 3; + } + + (tens, number) = Math.DivRem(number, 10); + addressString[1] = (char)('0' + number); + addressString[0] = (char)('0' + tens); + return 2; + } + + addressString[0] = (char)('0' + number); + return 1; + } + } + + internal static int FormatIPv4Address(uint address, Span addressString) { address = (uint)IPAddress.NetworkToHostOrder(unchecked((int)address)); int pos = FormatByte(address >> 24, addressString); - addressString[pos++] = TChar.CreateTruncating('.'); + addressString[pos++] = (byte)'.'; pos += FormatByte(address >> 16, addressString.Slice(pos)); - addressString[pos++] = TChar.CreateTruncating('.'); + addressString[pos++] = (byte)'.'; pos += FormatByte(address >> 8, addressString.Slice(pos)); - addressString[pos++] = TChar.CreateTruncating('.'); + addressString[pos++] = (byte)'.'; pos += FormatByte(address, addressString.Slice(pos)); return pos; [MethodImpl(MethodImplOptions.AggressiveInlining)] - static int FormatByte(uint number, Span addressString) + static int FormatByte(uint number, Span addressString) { number &= 0xFF; @@ -174,27 +310,125 @@ static int FormatByte(uint number, Span addressString) (uint hundredsAndTens, number) = Math.DivRem(number, 10); (hundreds, tens) = Math.DivRem(hundredsAndTens, 10); - addressString[2] = TChar.CreateTruncating('0' + number); - addressString[1] = TChar.CreateTruncating('0' + tens); - addressString[0] = TChar.CreateTruncating('0' + hundreds); + addressString[2] = (byte)('0' + number); + addressString[1] = (byte)('0' + tens); + addressString[0] = (byte)('0' + hundreds); return 3; } (tens, number) = Math.DivRem(number, 10); - addressString[1] = TChar.CreateTruncating('0' + number); - addressString[0] = TChar.CreateTruncating('0' + tens); + addressString[1] = (byte)('0' + number); + addressString[0] = (byte)('0' + tens); return 2; } - addressString[0] = TChar.CreateTruncating('0' + number); + addressString[0] = (byte)('0' + number); return 1; } } - internal static int FormatIPv6Address(ushort[] address, uint scopeId, Span destination) - where TChar : unmanaged, IBinaryInteger + internal static int FormatIPv6Address(ushort[] address, uint scopeId, Span destination) + { + int pos = 0; + + if (IPv6AddressHelper.ShouldHaveIpv4Embedded(address)) + { + // We need to treat the last 2 ushorts as a 4-byte IPv4 address, + // so output the first 6 ushorts normally, followed by the IPv4 address. + AppendSections(address.AsSpan(0, 6), destination, ref pos); + if (destination[pos - 1] != ':') + { + destination[pos++] = ':'; + } + + pos += FormatIPv4Address(ExtractIPv4Address(address), destination.Slice(pos)); + } + else + { + // No IPv4 address. Output all 8 sections as part of the IPv6 address + // with normal formatting rules. + AppendSections(address.AsSpan(0, 8), destination, ref pos); + } + + // If there's a scope ID, append it. + if (scopeId != 0) + { + destination[pos++] = '%'; + + int bytesWritten; + bool formatted = scopeId.TryFormat(destination.Slice(pos), out bytesWritten); + + Debug.Assert(formatted); + pos += bytesWritten; + } + + return pos; + + // Appends each of the numbers in address in indexed range [fromInclusive, toExclusive), + // while also replacing the longest sequence of 0s found in that range with "::", as long + // as the sequence is more than one 0. + static void AppendSections(ReadOnlySpan address, Span destination, ref int offset) + { + // Find the longest sequence of zeros to be combined into a "::" + (int zeroStart, int zeroEnd) = IPv6AddressHelper.FindCompressionRange(address); + bool needsColon = false; + + // Handle a zero sequence if there is one + if (zeroStart >= 0) + { + // Output all of the numbers before the zero sequence + for (int i = 0; i < zeroStart; i++) + { + if (needsColon) + { + destination[offset++] = ':'; + } + needsColon = true; + AppendHex(address[i], destination, ref offset); + } + + // Output the zero sequence if there is one + destination[offset++] = ':'; + destination[offset++] = ':'; + needsColon = false; + } + + // Output everything after the zero sequence + for (int i = zeroEnd; i < address.Length; i++) + { + if (needsColon) + { + destination[offset++] = ':'; + } + needsColon = true; + AppendHex(address[i], destination, ref offset); + } + } + + // Appends a number as hexadecimal (without the leading "0x") + static void AppendHex(ushort value, Span destination, ref int offset) + { + if ((value & 0xFFF0) != 0) + { + if ((value & 0xFF00) != 0) + { + if ((value & 0xF000) != 0) + { + destination[offset++] = HexConverter.ToCharLower(value >> 12); + } + + destination[offset++] = HexConverter.ToCharLower(value >> 8); + } + + destination[offset++] = HexConverter.ToCharLower(value >> 4); + } + + destination[offset++] = HexConverter.ToCharLower(value); + } + } + + internal static int FormatIPv6Address(ushort[] address, uint scopeId, Span destination) { - Debug.Assert(typeof(TChar) == typeof(byte) || typeof(TChar) == typeof(char)); int pos = 0; if (IPv6AddressHelper.ShouldHaveIpv4Embedded(address)) @@ -202,9 +436,9 @@ internal static int FormatIPv6Address(ushort[] address, uint scopeId, Spa // We need to treat the last 2 ushorts as a 4-byte IPv4 address, // so output the first 6 ushorts normally, followed by the IPv4 address. AppendSections(address.AsSpan(0, 6), destination, ref pos); - if (destination[pos - 1] != TChar.CreateTruncating(':')) + if (destination[pos - 1] != (byte)':') { - destination[pos++] = TChar.CreateTruncating(':'); + destination[pos++] = (byte)':'; } pos += FormatIPv4Address(ExtractIPv4Address(address), destination.Slice(pos)); @@ -219,12 +453,10 @@ internal static int FormatIPv6Address(ushort[] address, uint scopeId, Spa // If there's a scope ID, append it. if (scopeId != 0) { - destination[pos++] = TChar.CreateTruncating('%'); + destination[pos++] = (byte)'%'; int bytesWritten; - bool formatted = typeof(TChar) == typeof(byte) ? - scopeId.TryFormat(MemoryMarshal.Cast(destination).Slice(pos), out bytesWritten) : - scopeId.TryFormat(MemoryMarshal.Cast(destination).Slice(pos), out bytesWritten); + bool formatted = scopeId.TryFormat(destination.Slice(pos), out bytesWritten); Debug.Assert(formatted); pos += bytesWritten; @@ -235,7 +467,7 @@ internal static int FormatIPv6Address(ushort[] address, uint scopeId, Spa // Appends each of the numbers in address in indexed range [fromInclusive, toExclusive), // while also replacing the longest sequence of 0s found in that range with "::", as long // as the sequence is more than one 0. - static void AppendSections(ReadOnlySpan address, Span destination, ref int offset) + static void AppendSections(ReadOnlySpan address, Span destination, ref int offset) { // Find the longest sequence of zeros to be combined into a "::" (int zeroStart, int zeroEnd) = IPv6AddressHelper.FindCompressionRange(address); @@ -249,15 +481,15 @@ static void AppendSections(ReadOnlySpan address, Span destination { if (needsColon) { - destination[offset++] = TChar.CreateTruncating(':'); + destination[offset++] = (byte)':'; } needsColon = true; AppendHex(address[i], destination, ref offset); } // Output the zero sequence if there is one - destination[offset++] = TChar.CreateTruncating(':'); - destination[offset++] = TChar.CreateTruncating(':'); + destination[offset++] = (byte)':'; + destination[offset++] = (byte)':'; needsColon = false; } @@ -266,7 +498,7 @@ static void AppendSections(ReadOnlySpan address, Span destination { if (needsColon) { - destination[offset++] = TChar.CreateTruncating(':'); + destination[offset++] = (byte)':'; } needsColon = true; AppendHex(address[i], destination, ref offset); @@ -274,7 +506,7 @@ static void AppendSections(ReadOnlySpan address, Span destination } // Appends a number as hexadecimal (without the leading "0x") - static void AppendHex(ushort value, Span destination, ref int offset) + static void AppendHex(ushort value, Span destination, ref int offset) { if ((value & 0xFFF0) != 0) { @@ -282,16 +514,16 @@ static void AppendHex(ushort value, Span destination, ref int offset) { if ((value & 0xF000) != 0) { - destination[offset++] = TChar.CreateTruncating(HexConverter.ToCharLower(value >> 12)); + destination[offset++] = (byte)HexConverter.ToCharLower(value >> 12); } - destination[offset++] = TChar.CreateTruncating(HexConverter.ToCharLower(value >> 8)); + destination[offset++] = (byte)HexConverter.ToCharLower(value >> 8); } - destination[offset++] = TChar.CreateTruncating(HexConverter.ToCharLower(value >> 4)); + destination[offset++] = (byte)HexConverter.ToCharLower(value >> 4); } - destination[offset++] = TChar.CreateTruncating(HexConverter.ToCharLower(value)); + destination[offset++] = (byte)HexConverter.ToCharLower(value); } } diff --git a/src/libraries/System.Net.Primitives/src/System/Net/IPEndPoint.cs b/src/libraries/System.Net.Primitives/src/System/Net/IPEndPoint.cs index f2ef4b990587cd..a2603973133880 100644 --- a/src/libraries/System.Net.Primitives/src/System/Net/IPEndPoint.cs +++ b/src/libraries/System.Net.Primitives/src/System/Net/IPEndPoint.cs @@ -96,23 +96,20 @@ public static bool TryParse(string s, [NotNullWhen(true)] out IPEndPoint? result return TryParse(s.AsSpan(), out result); } - internal static bool InternalTryParse(ReadOnlySpan s, [NotNullWhen(true)] out IPEndPoint? result) - where TChar : unmanaged, IBinaryInteger + internal static bool InternalTryParse(ReadOnlySpan s, [NotNullWhen(true)] out IPEndPoint? result) { - Debug.Assert(typeof(TChar) == typeof(byte) || typeof(TChar) == typeof(char)); - int addressLength = s.Length; // If there's no port then send the entire string to the address parser - int lastColonPos = s.LastIndexOf(TChar.CreateTruncating(':')); + int lastColonPos = s.LastIndexOf(':'); // Look to see if this is an IPv6 address with a port. if (lastColonPos > 0) { - if (s[lastColonPos - 1] == TChar.CreateTruncating(']')) + if (s[lastColonPos - 1] == ']') { addressLength = lastColonPos; } // Look to see if this is IPv4 with a port (IPv6 will have another colon) - else if (s.Slice(0, lastColonPos).LastIndexOf(TChar.CreateTruncating(':')) == -1) + else if (s.Slice(0, lastColonPos).LastIndexOf(':') == -1) { addressLength = lastColonPos; } @@ -129,17 +126,57 @@ internal static bool InternalTryParse(ReadOnlySpan s, [NotNullWhen else { uint port; - ReadOnlySpan portSpan = s.Slice(addressLength + 1); + ReadOnlySpan portSpan = s.Slice(addressLength + 1); bool isConvertedToInt; - if (typeof(TChar) == typeof(byte)) - { - isConvertedToInt = uint.TryParse(MemoryMarshal.Cast(portSpan), NumberStyles.None, CultureInfo.InvariantCulture, out port); - } - else + isConvertedToInt = uint.TryParse(portSpan, NumberStyles.None, CultureInfo.InvariantCulture, out port); + + if (isConvertedToInt && port <= MaxPort) { - isConvertedToInt = uint.TryParse(MemoryMarshal.Cast(portSpan), NumberStyles.None, CultureInfo.InvariantCulture, out port); + result = new IPEndPoint(address, (int)port); + return true; } + } + } + + result = null; + return false; + } + + internal static bool InternalTryParse(ReadOnlySpan s, [NotNullWhen(true)] out IPEndPoint? result) + { + int addressLength = s.Length; // If there's no port then send the entire string to the address parser + int lastColonPos = s.LastIndexOf((byte)':'); + + // Look to see if this is an IPv6 address with a port. + if (lastColonPos > 0) + { + if (s[lastColonPos - 1] == (byte)']') + { + addressLength = lastColonPos; + } + // Look to see if this is IPv4 with a port (IPv6 will have another colon) + else if (s.Slice(0, lastColonPos).LastIndexOf((byte)':') == -1) + { + addressLength = lastColonPos; + } + } + + IPAddress? address = IPAddressParser.Parse(s.Slice(0, addressLength), true); + if (address is not null) + { + if (addressLength == s.Length) + { + result = new IPEndPoint(address, 0); + return true; + } + else + { + uint port; + ReadOnlySpan portSpan = s.Slice(addressLength + 1); + bool isConvertedToInt; + + isConvertedToInt = uint.TryParse(portSpan, NumberStyles.None, CultureInfo.InvariantCulture, out port); if (isConvertedToInt && port <= MaxPort) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Guid.cs b/src/libraries/System.Private.CoreLib/src/System/Guid.cs index fbb3c4a2922090..44af91ec5c6ba7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Guid.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Guid.cs @@ -470,7 +470,12 @@ public static bool TryParseExact(ReadOnlySpan input, [StringSyntax(StringS return false; } } - private static bool TryParseGuid(ReadOnlySpan guidString, ref GuidResult result) where TChar : unmanaged, IUtfChar + + // + // Char parsing + // + + private static bool TryParseGuid(ReadOnlySpan guidString, ref GuidResult result) { guidString = Number.SpanTrim(guidString); // Remove whitespace from beginning and end @@ -480,23 +485,23 @@ private static bool TryParseGuid(ReadOnlySpan guidString, ref Guid return false; } - return TChar.CastToUInt32(guidString[0]) switch + return guidString[0] switch { '(' => TryParseExactP(guidString, ref result), - '{' => guidString[9] == TChar.CastFrom('-') ? + '{' => guidString[9] == '-' ? TryParseExactB(guidString, ref result) : TryParseExactX(guidString, ref result), - _ => guidString[8] == TChar.CastFrom('-') ? + _ => guidString[8] == '-' ? TryParseExactD(guidString, ref result) : TryParseExactN(guidString, ref result), }; } - private static bool TryParseExactB(ReadOnlySpan guidString, ref GuidResult result) where TChar : unmanaged, IUtfChar + private static bool TryParseExactB(ReadOnlySpan guidString, ref GuidResult result) { // e.g. "{d85b1407-351d-4694-9392-03acc5870eb1}" - if (guidString.Length != 38 || guidString[0] != TChar.CastFrom('{') || guidString[37] != TChar.CastFrom('}')) + if (guidString.Length != 38 || guidString[0] != '{' || guidString[37] != '}') { result.SetFailure(ParseFailure.Format_GuidInvLen); return false; @@ -505,11 +510,11 @@ private static bool TryParseExactB(ReadOnlySpan guidString, ref Gu return TryParseExactD(guidString.Slice(1, 36), ref result); } - private static bool TryParseExactD(ReadOnlySpan guidString, ref GuidResult result) where TChar : unmanaged, IUtfChar + private static bool TryParseExactD(ReadOnlySpan guidString, ref GuidResult result) { // e.g. "d85b1407-351d-4694-9392-03acc5870eb1" - if (guidString.Length != 36 || guidString[8] != TChar.CastFrom('-') || guidString[13] != TChar.CastFrom('-') || guidString[18] != TChar.CastFrom('-') || guidString[23] != TChar.CastFrom('-')) + if (guidString.Length != 36 || guidString[8] != '-' || guidString[13] != '-' || guidString[18] != '-' || guidString[23] != '-') { result.SetFailure(guidString.Length != 36 ? ParseFailure.Format_GuidInvLen : ParseFailure.Format_GuidDashes); return false; @@ -517,16 +522,16 @@ private static bool TryParseExactD(ReadOnlySpan guidString, ref Gu Span bytes = MemoryMarshal.AsBytes(new Span(ref result)); int invalidIfNegative = 0; - bytes[0] = DecodeByte(guidString[6], guidString[7], ref invalidIfNegative); - bytes[1] = DecodeByte(guidString[4], guidString[5], ref invalidIfNegative); - bytes[2] = DecodeByte(guidString[2], guidString[3], ref invalidIfNegative); - bytes[3] = DecodeByte(guidString[0], guidString[1], ref invalidIfNegative); - bytes[4] = DecodeByte(guidString[11], guidString[12], ref invalidIfNegative); - bytes[5] = DecodeByte(guidString[9], guidString[10], ref invalidIfNegative); - bytes[6] = DecodeByte(guidString[16], guidString[17], ref invalidIfNegative); - bytes[7] = DecodeByte(guidString[14], guidString[15], ref invalidIfNegative); - bytes[8] = DecodeByte(guidString[19], guidString[20], ref invalidIfNegative); - bytes[9] = DecodeByte(guidString[21], guidString[22], ref invalidIfNegative); + bytes[0] = DecodeByte(guidString[6], guidString[7], ref invalidIfNegative); + bytes[1] = DecodeByte(guidString[4], guidString[5], ref invalidIfNegative); + bytes[2] = DecodeByte(guidString[2], guidString[3], ref invalidIfNegative); + bytes[3] = DecodeByte(guidString[0], guidString[1], ref invalidIfNegative); + bytes[4] = DecodeByte(guidString[11], guidString[12], ref invalidIfNegative); + bytes[5] = DecodeByte(guidString[9], guidString[10], ref invalidIfNegative); + bytes[6] = DecodeByte(guidString[16], guidString[17], ref invalidIfNegative); + bytes[7] = DecodeByte(guidString[14], guidString[15], ref invalidIfNegative); + bytes[8] = DecodeByte(guidString[19], guidString[20], ref invalidIfNegative); + bytes[9] = DecodeByte(guidString[21], guidString[22], ref invalidIfNegative); bytes[10] = DecodeByte(guidString[24], guidString[25], ref invalidIfNegative); bytes[11] = DecodeByte(guidString[26], guidString[27], ref invalidIfNegative); bytes[12] = DecodeByte(guidString[28], guidString[29], ref invalidIfNegative); @@ -552,7 +557,7 @@ private static bool TryParseExactD(ReadOnlySpan guidString, ref Gu // We continue to support these but expect them to be incredibly rare. As such, we // optimize for correctly formed strings where all the digits are valid hex, and only // fall back to supporting these other forms if parsing fails. - if (guidString.ContainsAny(TChar.CastFrom('X'), TChar.CastFrom('x'), TChar.CastFrom('+')) && TryCompatParsing(guidString, ref result)) + if (guidString.ContainsAny('X', 'x', '+') && TryCompatParsing(guidString, ref result)) { return true; } @@ -560,7 +565,7 @@ private static bool TryParseExactD(ReadOnlySpan guidString, ref Gu result.SetFailure(ParseFailure.Format_GuidInvalidChar); return false; - static bool TryCompatParsing(ReadOnlySpan guidString, ref GuidResult result) + static bool TryCompatParsing(ReadOnlySpan guidString, ref GuidResult result) { guidString = guidString.Slice(0, 36); @@ -593,7 +598,7 @@ static bool TryCompatParsing(ReadOnlySpan guidString, ref GuidResult resu } } - private static bool TryParseExactN(ReadOnlySpan guidString, ref GuidResult result) where TChar : unmanaged, IUtfChar + private static bool TryParseExactN(ReadOnlySpan guidString, ref GuidResult result) { // e.g. "d85b1407351d4694939203acc5870eb1" @@ -636,11 +641,11 @@ private static bool TryParseExactN(ReadOnlySpan guidString, ref Gu return false; } - private static bool TryParseExactP(ReadOnlySpan guidString, ref GuidResult result) where TChar : unmanaged, IUtfChar + private static bool TryParseExactP(ReadOnlySpan guidString, ref GuidResult result) { // e.g. "(d85b1407-351d-4694-9392-03acc5870eb1)" - if (guidString.Length != 38 || guidString[0] != TChar.CastFrom('(') || guidString[37] != TChar.CastFrom(')')) + if (guidString.Length != 38 || guidString[0] != '(' || guidString[37] != ')') { result.SetFailure(ParseFailure.Format_GuidInvLen); return false; @@ -649,7 +654,7 @@ private static bool TryParseExactP(ReadOnlySpan guidString, ref Gu return TryParseExactD(guidString.Slice(1, 36), ref result); } - private static bool TryParseExactX(ReadOnlySpan guidString, ref GuidResult result) where TChar : unmanaged, IUtfChar + private static bool TryParseExactX(ReadOnlySpan guidString, ref GuidResult result) { // e.g. "{0xd85b1407,0x351d,0x4694,{0x93,0x92,0x03,0xac,0xc5,0x87,0x0e,0xb1}}" @@ -664,10 +669,10 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu // Eat all of the whitespace. Unlike the other forms, X allows for any amount of whitespace // anywhere, not just at the beginning and end. - guidString = EatAllWhitespace(guidString, ref result); + guidString = EatAllWhitespace(guidString); // Check for leading '{' - if (guidString.Length == 0 || guidString[0] != TChar.CastFrom('{')) + if (guidString.Length == 0 || guidString[0] != '{') { result.SetFailure(ParseFailure.Format_GuidBrace); return false; @@ -682,7 +687,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu // Find the end of this hex number (since it is not fixed length) int numStart = 3; - int numLen = guidString.Slice(numStart).IndexOf(TChar.CastFrom(',')); + int numLen = guidString.Slice(numStart).IndexOf(','); if (numLen <= 0) { result.SetFailure(ParseFailure.Format_GuidComma); @@ -704,7 +709,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu } // +3 to get by ',0x' numStart = numStart + numLen + 3; - numLen = guidString.Slice(numStart).IndexOf(TChar.CastFrom(',')); + numLen = guidString.Slice(numStart).IndexOf(','); if (numLen <= 0) { result.SetFailure(ParseFailure.Format_GuidComma); @@ -726,7 +731,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu } // +3 to get by ',0x' numStart = numStart + numLen + 3; - numLen = guidString.Slice(numStart).IndexOf(TChar.CastFrom(',')); + numLen = guidString.Slice(numStart).IndexOf(','); if (numLen <= 0) { result.SetFailure(ParseFailure.Format_GuidComma); @@ -741,7 +746,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu } // Check for '{' - if ((uint)guidString.Length <= (uint)(numStart + numLen + 1) || guidString[numStart + numLen + 1] != TChar.CastFrom('{')) + if ((uint)guidString.Length <= (uint)(numStart + numLen + 1) || guidString[numStart + numLen + 1] != '{') { result.SetFailure(ParseFailure.Format_GuidBrace); return false; @@ -764,7 +769,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu // Calculate number length if (i < 7) // first 7 cases { - numLen = guidString.Slice(numStart).IndexOf(TChar.CastFrom(',')); + numLen = guidString.Slice(numStart).IndexOf(','); if (numLen <= 0) { result.SetFailure(ParseFailure.Format_GuidComma); @@ -773,7 +778,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu } else // last case ends with '}', not ',' { - numLen = guidString.Slice(numStart).IndexOf(TChar.CastFrom('}')); + numLen = guidString.Slice(numStart).IndexOf('}'); if (numLen <= 0) { result.SetFailure(ParseFailure.Format_GuidBraceAfterLastNumber); @@ -798,7 +803,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu } // Check for last '}' - if (numStart + numLen + 1 >= guidString.Length || guidString[numStart + numLen + 1] != TChar.CastFrom('}')) + if (numStart + numLen + 1 >= guidString.Length || guidString[numStart + numLen + 1] != '}') { result.SetFailure(ParseFailure.Format_GuidEndBrace); return false; @@ -815,7 +820,7 @@ private static bool TryParseExactX(ReadOnlySpan guidString, ref Gu } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static byte DecodeByte(TChar ch1, TChar ch2, ref int invalidIfNegative) where TChar : unmanaged, IUtfChar + private static byte DecodeByte(char ch1, char ch2, ref int invalidIfNegative) { ReadOnlySpan lookup = HexConverter.CharToHexLookup; Debug.Assert(lookup.Length == 256); @@ -823,37 +828,37 @@ private static byte DecodeByte(TChar ch1, TChar ch2, ref int invalidIfNeg int lower = (sbyte)lookup[byte.CreateTruncating(ch2)]; int result = (upper << 4) | lower; - uint c1 = TChar.CastToUInt32(ch1); - uint c2 = TChar.CastToUInt32(ch2); + uint c1 = (uint)ch1; + uint c2 = (uint)ch2; // Result will be negative if ch1 or/and ch2 are greater than 0xFF result = (c1 | c2) >> 8 == 0 ? result : -1; invalidIfNegative |= result; return (byte)result; } - private static bool TryParseHex(ReadOnlySpan guidString, out ushort result, ref bool overflow) where TChar : unmanaged, IUtfChar + private static bool TryParseHex(ReadOnlySpan guidString, out ushort result, ref bool overflow) { bool success = TryParseHex(guidString, out uint tmp, ref overflow); result = (ushort)tmp; return success; } - private static bool TryParseHex(ReadOnlySpan guidString, out uint result) where TChar : unmanaged, IUtfChar + private static bool TryParseHex(ReadOnlySpan guidString, out uint result) { bool overflowIgnored = false; return TryParseHex(guidString, out result, ref overflowIgnored); } - private static bool TryParseHex(ReadOnlySpan guidString, out uint result, ref bool overflow) where TChar : unmanaged, IUtfChar + private static bool TryParseHex(ReadOnlySpan guidString, out uint result, ref bool overflow) { if (guidString.Length > 0) { - if (guidString[0] == TChar.CastFrom('+')) + if (guidString[0] == '+') { guidString = guidString.Slice(1); } - if (guidString.Length > 1 && guidString[0] == TChar.CastFrom('0') && (guidString[1] | TChar.CastFrom(0x20)) == TChar.CastFrom('x')) + if (guidString.Length > 1 && guidString[0] == '0' && (guidString[1] | 0x20) == 'x') { guidString = guidString.Slice(2); } @@ -861,7 +866,7 @@ private static bool TryParseHex(ReadOnlySpan guidString, out uint // Skip past leading 0s. int i = 0; - for (; i < guidString.Length && guidString[i] == TChar.CastFrom('0'); i++) ; + for (; i < guidString.Length && guidString[i] == '0'; i++) ; int processedDigits = 0; uint tmp = 0; @@ -884,106 +889,525 @@ private static bool TryParseHex(ReadOnlySpan guidString, out uint return true; } - private static ReadOnlySpan EatAllWhitespace(ReadOnlySpan str, scoped ref GuidResult result) where TChar : unmanaged, IUtfChar + private static ReadOnlySpan EatAllWhitespace(ReadOnlySpan str) { - if (typeof(TChar) == typeof(char)) + ReadOnlySpan charSpan = Unsafe.BitCast, ReadOnlySpan>(str); + // Find the first whitespace character. If there is none, just return the input. + int i; + for (i = 0; i < charSpan.Length && !char.IsWhiteSpace(charSpan[i]); i++) ; + if (i == charSpan.Length) + { + return str; + } + + // There was at least one whitespace. Copy over everything prior to it to a new array. + var chArr = new char[charSpan.Length]; + int newLength = 0; + if (i > 0) + { + newLength = i; + charSpan.Slice(0, i).CopyTo(chArr); + } + + // Loop through the remaining chars, copying over non-whitespace. + for (; i < charSpan.Length; i++) { - ReadOnlySpan charSpan = Unsafe.BitCast, ReadOnlySpan>(str); - // Find the first whitespace character. If there is none, just return the input. - int i; - for (i = 0; i < charSpan.Length && !char.IsWhiteSpace(charSpan[i]); i++) ; - if (i == charSpan.Length) + char c = charSpan[i]; + if (!char.IsWhiteSpace(c)) { - return str; + chArr[newLength++] = c; } + } + + // Return the string with the whitespace removed. + return new ReadOnlySpan(chArr, 0, newLength); + } + + private static bool IsHexPrefix(ReadOnlySpan str, int i) => + i + 1 < str.Length && + str[i] == '0' && + (str[i + 1] | 0x20) == 'x'; + + // + // Byte parsing + // + + private static bool TryParseGuid(ReadOnlySpan guidString, ref GuidResult result) + { + guidString = Number.SpanTrim(guidString); // Remove whitespace from beginning and end + + if (guidString.Length < 32) // Minimal length we can parse ('N' format) + { + result.SetFailure(ParseFailure.Format_GuidUnrecognized); + return false; + } + + return guidString[0] switch + { + (byte)('(') => TryParseExactP(guidString, ref result), + (byte)('{') => guidString[9] == (byte)('-') ? + TryParseExactB(guidString, ref result) : + TryParseExactX(guidString, ref result), + _ => guidString[8] == (byte)('-') ? + TryParseExactD(guidString, ref result) : + TryParseExactN(guidString, ref result), + }; + } + + private static bool TryParseExactB(ReadOnlySpan guidString, ref GuidResult result) + { + // e.g. "{d85b1407-351d-4694-9392-03acc5870eb1}" + + if (guidString.Length != 38 || guidString[0] != (byte)('{') || guidString[37] != (byte)('}')) + { + result.SetFailure(ParseFailure.Format_GuidInvLen); + return false; + } + + return TryParseExactD(guidString.Slice(1, 36), ref result); + } - // There was at least one whitespace. Copy over everything prior to it to a new array. - var chArr = new char[charSpan.Length]; - int newLength = 0; - if (i > 0) + private static bool TryParseExactD(ReadOnlySpan guidString, ref GuidResult result) + { + // e.g. "d85b1407-351d-4694-9392-03acc5870eb1" + + if (guidString.Length != 36 || guidString[8] != (byte)('-') || guidString[13] != (byte)('-') || guidString[18] != (byte)('-') || guidString[23] != (byte)('-')) + { + result.SetFailure(guidString.Length != 36 ? ParseFailure.Format_GuidInvLen : ParseFailure.Format_GuidDashes); + return false; + } + + Span bytes = MemoryMarshal.AsBytes(new Span(ref result)); + int invalidIfNegative = 0; + bytes[0] = DecodeByte(guidString[6], guidString[7], ref invalidIfNegative); + bytes[1] = DecodeByte(guidString[4], guidString[5], ref invalidIfNegative); + bytes[2] = DecodeByte(guidString[2], guidString[3], ref invalidIfNegative); + bytes[3] = DecodeByte(guidString[0], guidString[1], ref invalidIfNegative); + bytes[4] = DecodeByte(guidString[11], guidString[12], ref invalidIfNegative); + bytes[5] = DecodeByte(guidString[9], guidString[10], ref invalidIfNegative); + bytes[6] = DecodeByte(guidString[16], guidString[17], ref invalidIfNegative); + bytes[7] = DecodeByte(guidString[14], guidString[15], ref invalidIfNegative); + bytes[8] = DecodeByte(guidString[19], guidString[20], ref invalidIfNegative); + bytes[9] = DecodeByte(guidString[21], guidString[22], ref invalidIfNegative); + bytes[10] = DecodeByte(guidString[24], guidString[25], ref invalidIfNegative); + bytes[11] = DecodeByte(guidString[26], guidString[27], ref invalidIfNegative); + bytes[12] = DecodeByte(guidString[28], guidString[29], ref invalidIfNegative); + bytes[13] = DecodeByte(guidString[30], guidString[31], ref invalidIfNegative); + bytes[14] = DecodeByte(guidString[32], guidString[33], ref invalidIfNegative); + bytes[15] = DecodeByte(guidString[34], guidString[35], ref invalidIfNegative); + + if (invalidIfNegative >= 0) + { + if (!BitConverter.IsLittleEndian) { - newLength = i; - charSpan.Slice(0, i).CopyTo(chArr); + result.ReverseAbcEndianness(); } - // Loop through the remaining chars, copying over non-whitespace. - for (; i < charSpan.Length; i++) + return true; + } + + // The 'D' format has some undesirable behavior leftover from its original implementation: + // - Components may begin with "0x" and/or "+", but the expected length of each component + // needs to include those prefixes, e.g. a four digit component could be "1234" or + // "0x34" or "+0x4" or "+234", but not "0x1234" nor "+1234" nor "+0x1234". + // - "0X" is valid instead of "0x" + // We continue to support these but expect them to be incredibly rare. As such, we + // optimize for correctly formed strings where all the digits are valid hex, and only + // fall back to supporting these other forms if parsing fails. + if (guidString.ContainsAny((byte)('X'), (byte)('x'), (byte)('+')) && TryCompatParsing(guidString, ref result)) + { + return true; + } + + result.SetFailure(ParseFailure.Format_GuidInvalidChar); + return false; + + static bool TryCompatParsing(ReadOnlySpan guidString, ref GuidResult result) + { + guidString = guidString.Slice(0, 36); + + if (TryParseHex(guidString.Slice(0, 8), out result._a) && // _a + TryParseHex(guidString.Slice(9, 4), out uint uintTmp)) // _b { - char c = charSpan[i]; - if (!char.IsWhiteSpace(c)) + result._b = (ushort)uintTmp; + if (TryParseHex(guidString.Slice(14, 4), out uintTmp)) // _c { - chArr[newLength++] = c; + result._c = (ushort)uintTmp; + if (TryParseHex(guidString.Slice(19, 4), out uintTmp)) // _d, _e + { + result._de = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness((ushort)uintTmp) : (ushort)uintTmp; + if (TryParseHex(guidString.Slice(24, 4), out uintTmp)) // _f, _g + { + result._fg = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness((ushort)uintTmp) : (ushort)uintTmp; + + // Unlike the other components, this one never allowed 0x or +, so we can parse it as straight hex. + if (Number.TryParseBinaryIntegerHexNumberStyle(guidString.Slice(28, 8), NumberStyles.AllowHexSpecifier, out uintTmp) == Number.ParsingStatus.OK) // _h, _i, _j, _k + { + result._hijk = BitConverter.IsLittleEndian ? BinaryPrimitives.ReverseEndianness(uintTmp) : uintTmp; + return true; + } + } + } } } - // Return the string with the whitespace removed. - return Unsafe.BitCast, ReadOnlySpan>(new ReadOnlySpan(chArr, 0, newLength)); + return false; } - else + } + + private static bool TryParseExactN(ReadOnlySpan guidString, ref GuidResult result) + { + // e.g. "d85b1407351d4694939203acc5870eb1" + + if (guidString.Length != 32) { - Debug.Assert(typeof(TChar) == typeof(byte)); + result.SetFailure(ParseFailure.Format_GuidInvLen); + return false; + } - ReadOnlySpan srcUtf8Span = Unsafe.BitCast, ReadOnlySpan>(str); + Span bytes = MemoryMarshal.AsBytes(new Span(ref result)); + int invalidIfNegative = 0; + bytes[0] = DecodeByte(guidString[6], guidString[7], ref invalidIfNegative); + bytes[1] = DecodeByte(guidString[4], guidString[5], ref invalidIfNegative); + bytes[2] = DecodeByte(guidString[2], guidString[3], ref invalidIfNegative); + bytes[3] = DecodeByte(guidString[0], guidString[1], ref invalidIfNegative); + bytes[4] = DecodeByte(guidString[10], guidString[11], ref invalidIfNegative); + bytes[5] = DecodeByte(guidString[8], guidString[9], ref invalidIfNegative); + bytes[6] = DecodeByte(guidString[14], guidString[15], ref invalidIfNegative); + bytes[7] = DecodeByte(guidString[12], guidString[13], ref invalidIfNegative); + bytes[8] = DecodeByte(guidString[16], guidString[17], ref invalidIfNegative); + bytes[9] = DecodeByte(guidString[18], guidString[19], ref invalidIfNegative); + bytes[10] = DecodeByte(guidString[20], guidString[21], ref invalidIfNegative); + bytes[11] = DecodeByte(guidString[22], guidString[23], ref invalidIfNegative); + bytes[12] = DecodeByte(guidString[24], guidString[25], ref invalidIfNegative); + bytes[13] = DecodeByte(guidString[26], guidString[27], ref invalidIfNegative); + bytes[14] = DecodeByte(guidString[28], guidString[29], ref invalidIfNegative); + bytes[15] = DecodeByte(guidString[30], guidString[31], ref invalidIfNegative); - // Find the first whitespace character. If there is none, just return the input. - int i = 0; - while (i < srcUtf8Span.Length) + if (invalidIfNegative >= 0) + { + if (!BitConverter.IsLittleEndian) { - if (Rune.DecodeFromUtf8(srcUtf8Span.Slice(i), out Rune current, out int bytesConsumed) != Buffers.OperationStatus.Done) + result.ReverseAbcEndianness(); + } + + return true; + } + + result.SetFailure(ParseFailure.Format_GuidInvalidChar); + return false; + } + + private static bool TryParseExactP(ReadOnlySpan guidString, ref GuidResult result) + { + // e.g. "(d85b1407-351d-4694-9392-03acc5870eb1)" + + if (guidString.Length != 38 || guidString[0] != (byte)('(') || guidString[37] != (byte)(')')) + { + result.SetFailure(ParseFailure.Format_GuidInvLen); + return false; + } + + return TryParseExactD(guidString.Slice(1, 36), ref result); + } + + private static bool TryParseExactX(ReadOnlySpan guidString, ref GuidResult result) + { + // e.g. "{0xd85b1407,0x351d,0x4694,{0x93,0x92,0x03,0xac,0xc5,0x87,0x0e,0xb1}}" + + // Compat notes due to the previous implementation's implementation details. + // - Each component need not be the full expected number of digits. + // - Each component may contain any number of leading 0s + // - The "short" components are parsed as 32-bits and only considered to overflow if they'd overflow 32 bits. + // - The "byte" components are parsed as 32-bits and are considered to overflow if they'd overflow 8 bits, + // but for the Guid ctor, whether they overflow 8 bits or 32 bits results in differing exceptions. + // - Components may begin with "0x", "0x+", even "0x+0x". + // - "0X" is valid instead of "0x" + + // Eat all of the whitespace. Unlike the other forms, X allows for any amount of whitespace + // anywhere, not just at the beginning and end. + guidString = EatAllWhitespace(guidString, ref result); + + // Check for leading '{' + if (guidString.Length == 0 || guidString[0] != (byte)('{')) + { + result.SetFailure(ParseFailure.Format_GuidBrace); + return false; + } + + // Check for '0x' + if (!IsHexPrefix(guidString, 1)) + { + result.SetFailure(ParseFailure.Format_GuidHexPrefix); + return false; + } + + // Find the end of this hex number (since it is not fixed length) + int numStart = 3; + int numLen = guidString.Slice(numStart).IndexOf((byte)(',')); + if (numLen <= 0) + { + result.SetFailure(ParseFailure.Format_GuidComma); + return false; + } + + bool overflow = false; + if (!TryParseHex(guidString.Slice(numStart, numLen), out result._a, ref overflow) || overflow) + { + result.SetFailure(overflow ? ParseFailure.Overflow_UInt32 : ParseFailure.Format_GuidInvalidChar); + return false; + } + + // Check for '0x' + if (!IsHexPrefix(guidString, numStart + numLen + 1)) + { + result.SetFailure(ParseFailure.Format_GuidHexPrefix); + return false; + } + // +3 to get by ',0x' + numStart = numStart + numLen + 3; + numLen = guidString.Slice(numStart).IndexOf((byte)(',')); + if (numLen <= 0) + { + result.SetFailure(ParseFailure.Format_GuidComma); + return false; + } + + // Read in the number + if (!TryParseHex(guidString.Slice(numStart, numLen), out result._b, ref overflow) || overflow) + { + result.SetFailure(overflow ? ParseFailure.Overflow_UInt32 : ParseFailure.Format_GuidInvalidChar); + return false; + } + + // Check for '0x' + if (!IsHexPrefix(guidString, numStart + numLen + 1)) + { + result.SetFailure(ParseFailure.Format_GuidHexPrefix); + return false; + } + // +3 to get by ',0x' + numStart = numStart + numLen + 3; + numLen = guidString.Slice(numStart).IndexOf((byte)(',')); + if (numLen <= 0) + { + result.SetFailure(ParseFailure.Format_GuidComma); + return false; + } + + // Read in the number + if (!TryParseHex(guidString.Slice(numStart, numLen), out result._c, ref overflow) || overflow) + { + result.SetFailure(overflow ? ParseFailure.Overflow_UInt32 : ParseFailure.Format_GuidInvalidChar); + return false; + } + + // Check for '{' + if ((uint)guidString.Length <= (uint)(numStart + numLen + 1) || guidString[numStart + numLen + 1] != (byte)('{')) + { + result.SetFailure(ParseFailure.Format_GuidBrace); + return false; + } + + // Prepare for loop + numLen++; + for (int i = 0; i < 8; i++) + { + // Check for '0x' + if (!IsHexPrefix(guidString, numStart + numLen + 1)) + { + result.SetFailure(ParseFailure.Format_GuidHexPrefix); + return false; + } + + // +3 to get by ',0x' or '{0x' for first case + numStart = numStart + numLen + 3; + + // Calculate number length + if (i < 7) // first 7 cases + { + numLen = guidString.Slice(numStart).IndexOf((byte)(',')); + if (numLen <= 0) { - result.SetFailure(ParseFailure.Format_GuidInvalidChar); - return ReadOnlySpan.Empty; + result.SetFailure(ParseFailure.Format_GuidComma); + return false; } - - if (!Rune.IsWhiteSpace(current)) + } + else // last case ends with '}', not ',' + { + numLen = guidString.Slice(numStart).IndexOf((byte)('}')); + if (numLen <= 0) { - break; + result.SetFailure(ParseFailure.Format_GuidBraceAfterLastNumber); + return false; } + } + + // Read in the number + if (!TryParseHex(guidString.Slice(numStart, numLen), out uint byteVal, ref overflow) || overflow || byteVal > byte.MaxValue) + { + // The previous implementation had some odd inconsistencies, which are carried forward here. + // The byte values in the X format are treated as integers with regards to overflow, so + // a "byte" value like 0xddd in Guid's ctor results in a FormatException but 0xddddddddd results + // in OverflowException. + result.SetFailure( + overflow ? ParseFailure.Overflow_UInt32 : + byteVal > byte.MaxValue ? ParseFailure.Overflow_Byte : + ParseFailure.Format_GuidInvalidChar); + return false; + } + Unsafe.Add(ref result._d, i) = (byte)byteVal; + } + + // Check for last '}' + if (numStart + numLen + 1 >= guidString.Length || guidString[numStart + numLen + 1] != (byte)('}')) + { + result.SetFailure(ParseFailure.Format_GuidEndBrace); + return false; + } + + // Check if we have extra characters at the end + if (numStart + numLen + 1 != guidString.Length - 1) + { + result.SetFailure(ParseFailure.Format_ExtraJunkAtEnd); + return false; + } + + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static byte DecodeByte(byte ch1, byte ch2, ref int invalidIfNegative) + { + ReadOnlySpan lookup = HexConverter.CharToHexLookup; + Debug.Assert(lookup.Length == 256); + int upper = (sbyte)lookup[byte.CreateTruncating(ch1)]; + int lower = (sbyte)lookup[byte.CreateTruncating(ch2)]; + int result = (upper << 4) | lower; + + uint c1 = (uint)ch1; + uint c2 = (uint)ch2; + // Result will be negative if ch1 or/and ch2 are greater than 0xFF + result = (c1 | c2) >> 8 == 0 ? result : -1; + invalidIfNegative |= result; + return (byte)result; + } + + private static bool TryParseHex(ReadOnlySpan guidString, out ushort result, ref bool overflow) + { + bool success = TryParseHex(guidString, out uint tmp, ref overflow); + result = (ushort)tmp; + return success; + } + + private static bool TryParseHex(ReadOnlySpan guidString, out uint result) + { + bool overflowIgnored = false; + return TryParseHex(guidString, out result, ref overflowIgnored); + } + + private static bool TryParseHex(ReadOnlySpan guidString, out uint result, ref bool overflow) + { + if (guidString.Length > 0) + { + if (guidString[0] == (byte)('+')) + { + guidString = guidString.Slice(1); + } - i += bytesConsumed; + if (guidString.Length > 1 && guidString[0] == (byte)('0') && (guidString[1] | (byte)(0x20)) == (byte)('x')) + { + guidString = guidString.Slice(2); } + } - if (i == srcUtf8Span.Length) + // Skip past leading 0s. + int i = 0; + for (; i < guidString.Length && guidString[i] == (byte)('0'); i++) ; + + int processedDigits = 0; + uint tmp = 0; + for (; i < guidString.Length; i++) + { + int c = int.CreateTruncating(guidString[i]); + int numValue = HexConverter.FromChar(c); + if (numValue == 0xFF) { - return str; + if (processedDigits > 8) overflow = true; + result = 0; + return false; } + tmp = (tmp * 16) + (uint)numValue; + processedDigits++; + } + + if (processedDigits > 8) overflow = true; + result = tmp; + return true; + } - // There was at least one whitespace. Copy over everything prior to it to a new array. - Span destUtf8Span = new byte[srcUtf8Span.Length]; - int newLength = 0; - if (i > 0) + private static ReadOnlySpan EatAllWhitespace(ReadOnlySpan str, scoped ref GuidResult result) + { + ReadOnlySpan srcUtf8Span = Unsafe.BitCast, ReadOnlySpan>(str); + + // Find the first whitespace character. If there is none, just return the input. + int i = 0; + while (i < srcUtf8Span.Length) + { + if (Rune.DecodeFromUtf8(srcUtf8Span.Slice(i), out Rune current, out int bytesConsumed) != Buffers.OperationStatus.Done) { - newLength = i; - srcUtf8Span.Slice(0, i).CopyTo(destUtf8Span); + result.SetFailure(ParseFailure.Format_GuidInvalidChar); + return ReadOnlySpan.Empty; } - // Loop through the remaining chars, copying over non-whitespace. - while (i < srcUtf8Span.Length) + if (!Rune.IsWhiteSpace(current)) { - if (Rune.DecodeFromUtf8(srcUtf8Span.Slice(i), out Rune current, out int bytesConsumed) != Buffers.OperationStatus.Done) - { - result.SetFailure(ParseFailure.Format_GuidInvalidChar); - return ReadOnlySpan.Empty; - } + break; + } - if (!Rune.IsWhiteSpace(current)) - { - srcUtf8Span.Slice(i, bytesConsumed).CopyTo(destUtf8Span.Slice(newLength)); - newLength += bytesConsumed; - } + i += bytesConsumed; + } - i += bytesConsumed; + if (i == srcUtf8Span.Length) + { + return str; + } + + // There was at least one whitespace. Copy over everything prior to it to a new array. + Span destUtf8Span = new byte[srcUtf8Span.Length]; + int newLength = 0; + if (i > 0) + { + newLength = i; + srcUtf8Span.Slice(0, i).CopyTo(destUtf8Span); + } + + // Loop through the remaining chars, copying over non-whitespace. + while (i < srcUtf8Span.Length) + { + if (Rune.DecodeFromUtf8(srcUtf8Span.Slice(i), out Rune current, out int bytesConsumed) != Buffers.OperationStatus.Done) + { + result.SetFailure(ParseFailure.Format_GuidInvalidChar); + return ReadOnlySpan.Empty; + } + + if (!Rune.IsWhiteSpace(current)) + { + srcUtf8Span.Slice(i, bytesConsumed).CopyTo(destUtf8Span.Slice(newLength)); + newLength += bytesConsumed; } - // Return the string with the whitespace removed. - return Unsafe.BitCast, ReadOnlySpan>(destUtf8Span.Slice(0, newLength)); + i += bytesConsumed; } + + // Return the string with the whitespace removed. + return destUtf8Span.Slice(0, newLength); } - private static bool IsHexPrefix(ReadOnlySpan str, int i) where TChar : unmanaged, IUtfChar => + private static bool IsHexPrefix(ReadOnlySpan str, int i) => i + 1 < str.Length && - str[i] == TChar.CastFrom('0') && - (str[i + 1] | TChar.CastFrom(0x20)) == TChar.CastFrom('x'); + str[i] == (byte)('0') && + (str[i + 1] | (byte)(0x20)) == (byte)('x'); // Returns an unsigned byte array containing the GUID. public byte[] ToByteArray() diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 3808fb037410c9..d033416bb0fb81 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -70,16 +70,13 @@ internal static unsafe void ComputeAnyByteState(ReadOnlySpan values, out A state = new AnyByteState(bitmapSpace0, bitmapSpace1, lookupLocal); } - internal static unsafe void ComputeAsciiState(ReadOnlySpan values, out AsciiState state) - where T : struct, IUnsignedNumber + internal static unsafe void ComputeAsciiState(ReadOnlySpan values, out AsciiState state) { - Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); - Vector128 bitmapSpace = default; byte* bitmapLocal = (byte*)&bitmapSpace; BitVector256 lookupLocal = default; - foreach (T tValue in values) + foreach (char tValue in values) { int value = int.CreateChecked(tValue); @@ -95,11 +92,8 @@ internal static unsafe void ComputeAsciiState(ReadOnlySpan values, out Asc state = new AsciiState(bitmapSpace, lookupLocal); } - public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int maxInclusive) - where T : struct, IUnsignedNumber + public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int maxInclusive) { - Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); - if (!IsVectorizationSupported || values.Length > 16) { return false; @@ -112,7 +106,7 @@ public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int ma return false; } - if (typeof(T) == typeof(char) && maxInclusive >= byte.MaxValue) + if (maxInclusive >= byte.MaxValue) { // When packing UTF-16 characters into bytes, values may saturate to 255 (false positives), hence ">=" instead of ">". return false; @@ -121,7 +115,7 @@ public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int ma // We assume there are no duplicates to simplify the logic (if there are any, they just won't use this searching approach). int seenNibbles = 0; - foreach (T tValue in values) + foreach (char tValue in values) { int bit = 1 << (int.CreateChecked(tValue) & 0xF); @@ -137,17 +131,97 @@ public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int ma return true; } - public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out AsciiState state) - where T : struct, IUnsignedNumber + public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out AsciiState state) { - Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); + Vector128 valuesByLowNibble = default; + BitVector256 lookup = default; + + foreach (char tValue in values) + { + byte value = (byte)(tValue); + lookup.Set(value); + valuesByLowNibble.SetElementUnsafe(value & 0xF, value); + } + // Elements of 'valuesByLowNibble' where no value had that low nibble will be left uninitialized at 0. + // For most, that is okay, as only the zero character in the input could ever match against them, + // but where such input characters will always be mapped to the 0th element of 'valuesByLowNibble'. + // + // That does mean we could still see false positivies if none of the values had a low nibble of zero. + // To avoid that, we can replace the 0th element with any other byte that has a non-zero low nibble. + // The zero character will no longer match, and the new value we pick won't match either as + // it will be mapped to a different element in 'valuesByLowNibble' given its non-zero low nibble. + if (valuesByLowNibble.GetElement(0) == 0 && !lookup.Contains(0)) + { + valuesByLowNibble.SetElementUnsafe(0, (byte)1); + } + + state = new AsciiState(valuesByLowNibble, lookup); + } + + internal static unsafe void ComputeAsciiState(ReadOnlySpan values, out AsciiState state) + { + Vector128 bitmapSpace = default; + byte* bitmapLocal = (byte*)&bitmapSpace; + BitVector256 lookupLocal = default; + + foreach (byte tValue in values) + { + int value = int.CreateChecked(tValue); + + if (value > 127) + { + continue; + } + + lookupLocal.Set(value); + SetBitmapBit(bitmapLocal, value); + } + + state = new AsciiState(bitmapSpace, lookupLocal); + } + + public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int maxInclusive) + { + if (!IsVectorizationSupported || values.Length > 16) + { + return false; + } + + if (Ssse3.IsSupported && maxInclusive > 127) + { + // We could support values higher than 127 if we did the "& 0xF" before calling into Shuffle in IndexOfAnyLookupCore. + // We currently optimize for the common case of ASCII characters instead, saving an instruction there. + return false; + } + + // We assume there are no duplicates to simplify the logic (if there are any, they just won't use this searching approach). + int seenNibbles = 0; + + foreach (byte tValue in values) + { + int bit = 1 << (int.CreateChecked(tValue) & 0xF); + + if ((seenNibbles & bit) != 0) + { + // We already saw a value with the same low nibble. + return false; + } + + seenNibbles |= bit; + } + + return true; + } + + public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out AsciiState state) + { Vector128 valuesByLowNibble = default; BitVector256 lookup = default; - foreach (T tValue in values) + foreach (byte tValue in values) { - byte value = byte.CreateTruncating(tValue); + byte value = (byte)tValue; lookup.Set(value); valuesByLowNibble.SetElementUnsafe(value & 0xF, value); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs index 73de2d970c626d..35ad18c24ca310 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs @@ -241,22 +241,21 @@ public static SearchValues Create(ReadOnlySpan values, StringCom return StringSearchValues.Create(values, ignoreCase: comparisonType == StringComparison.OrdinalIgnoreCase); } - private static bool TryGetSingleRange(ReadOnlySpan values, out T minInclusive, out T maxInclusive) - where T : struct, INumber, IMinMaxValue + private static bool TryGetSingleRange(ReadOnlySpan values, out char minInclusive, out char maxInclusive) { - T min = T.MaxValue; - T max = T.MinValue; + char min = (char)ushort.MaxValue; + char max = (char)ushort.MinValue; - foreach (T value in values) + foreach (char value in values) { - min = T.Min(min, value); - max = T.Max(max, value); + min = (char)ushort.Min(min, value); + max = (char)ushort.Max(max, value); } minInclusive = min; maxInclusive = max; - uint range = uint.CreateChecked(max - min) + 1; + uint range = (uint)(max - min) + 1; if (range > values.Length) { return false; @@ -266,9 +265,47 @@ private static bool TryGetSingleRange(ReadOnlySpan values, out T minInclus seenValues = seenValues.Slice(0, (int)range); seenValues.Clear(); - foreach (T value in values) + foreach (char value in values) { - int offset = int.CreateChecked(value - min); + int offset = (int)(value - min); + seenValues[offset] = true; + } + + if (seenValues.Contains(false)) + { + return false; + } + + return true; + } + + private static bool TryGetSingleRange(ReadOnlySpan values, out byte minInclusive, out byte maxInclusive) + { + byte min = byte.MaxValue; + byte max = byte.MinValue; + + foreach (byte value in values) + { + min = byte.Min(min, value); + max = byte.Max(max, value); + } + + minInclusive = min; + maxInclusive = max; + + uint range = (uint)(max - min) + 1; + if (range > values.Length) + { + return false; + } + + Span seenValues = range <= 256 ? stackalloc bool[256] : new bool[range]; + seenValues = seenValues.Slice(0, (int)range); + seenValues.Clear(); + + foreach (byte value in values) + { + int offset = (int)(value - min); seenValues[offset] = true; }