diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs index 5461301f4e4714..1cd73a526e5e48 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs @@ -395,46 +395,38 @@ public static OperationStatus DecodeFromUtf8(ReadOnlySpan source, out Rune // it tries to consume as many code units as possible as long as those code // units constitute the beginning of a longer well-formed subsequence per Table 3-7. - int index = 0; - - // Try reading input[0]. + // Try reading source[0]. - if ((uint)index >= (uint)source.Length) + int index = 0; + if (source.IsEmpty) { goto NeedsMoreData; } - uint tempValue = source[index]; - if (!UnicodeUtility.IsAsciiCodePoint(tempValue)) + uint tempValue = source[0]; + if (UnicodeUtility.IsAsciiCodePoint(tempValue)) { - goto NotAscii; + bytesConsumed = 1; + result = UnsafeCreate(tempValue); + return OperationStatus.Done; } - Finish: - - bytesConsumed = index + 1; - Debug.Assert(1 <= bytesConsumed && bytesConsumed <= 4); // Valid subsequences are always length [1..4] - result = UnsafeCreate(tempValue); - return OperationStatus.Done; - - NotAscii: - // Per Table 3-7, the beginning of a multibyte sequence must be a code unit in // the range [C2..F4]. If it's outside of that range, it's either a standalone // continuation byte, or it's an overlong two-byte sequence, or it's an out-of-range // four-byte sequence. + // Try reading source[1]. + + index = 1; if (!UnicodeUtility.IsInRangeInclusive(tempValue, 0xC2, 0xF4)) { - goto FirstByteInvalid; + goto Invalid; } tempValue = (tempValue - 0xC2) << 6; - // Try reading input[1]. - - index++; - if ((uint)index >= (uint)source.Length) + if (source.Length <= 1) { goto NeedsMoreData; } @@ -443,7 +435,7 @@ public static OperationStatus DecodeFromUtf8(ReadOnlySpan source, out Rune // complement representation is in the range [-65..-128]. This allows us to // perform a single comparison to see if a byte is a continuation byte. - int thisByteSignExtended = (sbyte)source[index]; + int thisByteSignExtended = (sbyte)source[1]; if (thisByteSignExtended >= -64) { goto Invalid; @@ -485,15 +477,15 @@ public static OperationStatus DecodeFromUtf8(ReadOnlySpan source, out Rune // The first two bytes were just fine. We don't need to perform any other checks // on the remaining bytes other than to see that they're valid continuation bytes. - // Try reading input[2]. + // Try reading source[2]. - index++; - if ((uint)index >= (uint)source.Length) + index = 2; + if (source.Length <= 2) { goto NeedsMoreData; } - thisByteSignExtended = (sbyte)source[index]; + thisByteSignExtended = (sbyte)source[2]; if (thisByteSignExtended >= -64) { goto Invalid; // this byte is not a UTF-8 continuation byte @@ -510,15 +502,15 @@ public static OperationStatus DecodeFromUtf8(ReadOnlySpan source, out Rune goto Finish; // this is a valid 3-byte sequence } - // Try reading input[3]. + // Try reading source[3]. - index++; - if ((uint)index >= (uint)source.Length) + index = 3; + if (source.Length <= 3) { goto NeedsMoreData; } - thisByteSignExtended = (sbyte)source[index]; + thisByteSignExtended = (sbyte)source[3]; if (thisByteSignExtended >= -64) { goto Invalid; // this byte is not a UTF-8 continuation byte @@ -529,26 +521,29 @@ public static OperationStatus DecodeFromUtf8(ReadOnlySpan source, out Rune tempValue += 0x80; // remove the continuation byte marker tempValue -= (0xF0 - 0xE0) << 18; // remove the leading byte marker + // Valid 4-byte sequence UnicodeDebug.AssertIsValidSupplementaryPlaneScalar(tempValue); - goto Finish; // this is a valid 4-byte sequence - FirstByteInvalid: + Finish: - index = 1; // Invalid subsequences are always at least length 1. + bytesConsumed = index + 1; + Debug.Assert(1 <= bytesConsumed && bytesConsumed <= 4); // Valid subsequences are always length [1..4] + result = UnsafeCreate(tempValue); + return OperationStatus.Done; - Invalid: + NeedsMoreData: - Debug.Assert(1 <= index && index <= 3); // Invalid subsequences are always length 1..3 + Debug.Assert(0 <= index && index <= 3); // Incomplete subsequences are always length 0..3 bytesConsumed = index; result = ReplacementChar; - return OperationStatus.InvalidData; + return OperationStatus.NeedMoreData; - NeedsMoreData: + Invalid: - Debug.Assert(0 <= index && index <= 3); // Incomplete subsequences are always length 0..3 + Debug.Assert(1 <= index && index <= 3); // Invalid subsequences are always length 1..3 bytesConsumed = index; result = ReplacementChar; - return OperationStatus.NeedMoreData; + return OperationStatus.InvalidData; } ///