-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Add APIs to BlobBuilder for customizing the underlying byte array et al.
#115294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
fd2a7e2
d1b3914
776a69f
655ad38
7e419b0
8c078c7
0f8c96a
ff7ea5e
31f3f24
a9443c6
07987e4
125164d
bc81ac5
ed979d9
515e825
5bd0a1d
4c5ea72
c0cb357
0633ea5
2ea1121
82bd703
2e8f025
6fa2fdf
4f7ebd8
9123d14
558e2e7
197db30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,16 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| using System.Buffers; | ||
| using System.Buffers.Binary; | ||
| using System.Diagnostics; | ||
| using System.Reflection.Internal; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Runtime.InteropServices; | ||
| using System.Text; | ||
| #if NET | ||
| using System.Text.Unicode; | ||
| #endif | ||
|
|
||
| namespace System.Reflection | ||
| { | ||
|
|
@@ -123,148 +128,135 @@ public static void WriteGuid(this byte[] buffer, int start, Guid value) | |
| #endif | ||
| } | ||
|
|
||
| public static unsafe void WriteUTF8(this byte[] buffer, int start, char* charPtr, int charCount, int byteCount, bool allowUnpairedSurrogates) | ||
| #if NET | ||
| public static void WriteUtf8(ReadOnlySpan<char> source, Span<byte> destination, out int charsRead, out int bytesWritten, bool allowUnpairedSurrogates) | ||
teo-tsirpanis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| { | ||
| Debug.Assert(byteCount >= charCount); | ||
| const char ReplacementCharacter = '\uFFFD'; | ||
| int sourceLength = source.Length; | ||
| int destinationLength = destination.Length; | ||
|
|
||
| char* strEnd = charPtr + charCount; | ||
| fixed (byte* bufferPtr = &buffer[0]) | ||
| while (true) | ||
| { | ||
| byte* ptr = bufferPtr + start; | ||
| OperationStatus status = Utf8.FromUtf16(source, destination, out int consumed, out int written, replaceInvalidSequences: !allowUnpairedSurrogates, isFinalBlock: true); | ||
| source = source.Slice(consumed); | ||
| destination = destination.Slice(written); | ||
|
|
||
| if (byteCount == charCount) | ||
| if (status <= OperationStatus.DestinationTooSmall) | ||
| { | ||
| while (charPtr < strEnd) | ||
| { | ||
| Debug.Assert(*charPtr <= 0x7f); | ||
| *ptr++ = unchecked((byte)*charPtr++); | ||
| } | ||
| break; | ||
| } | ||
| else | ||
|
|
||
| // NeedsMoreData is not expected because isFinalBlock is set to true. | ||
| Debug.Assert(status == OperationStatus.InvalidData); | ||
| // If we don't allow unpaired surrogates, they should have been replaced by FromUtf16. | ||
| Debug.Assert(allowUnpairedSurrogates); | ||
| char c = source[0]; | ||
| Debug.Assert(char.IsSurrogate(c)); | ||
| if (destination.Length < 3) | ||
| { | ||
| while (charPtr < strEnd) | ||
| { | ||
| char c = *charPtr++; | ||
| break; | ||
| } | ||
| destination[0] = (byte)(((c >> 12) & 0xF) | 0xE0); | ||
| destination[1] = (byte)(((c >> 6) & 0x3F) | 0x80); | ||
| destination[2] = (byte)((c & 0x3F) | 0x80); | ||
| source = source.Slice(1); | ||
| destination = destination.Slice(3); | ||
| } | ||
|
|
||
| charsRead = sourceLength - source.Length; | ||
| bytesWritten = destinationLength - destination.Length; | ||
| } | ||
| #else | ||
| public static void WriteUtf8(ReadOnlySpan<char> source, Span<byte> destination, out int charsRead, out int bytesWritten, bool allowUnpairedSurrogates) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to change at all? If this is for .NET Framework, I would just leave it as-is. There is little benefit to changing anything due to possibility of regressions.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. The method's old signature was harder to work with; it uses pointers, does not clearly state which buffer is read-only, and requires pre-computing the UTF-8 byte count. I had to refactor SRM's UTF-8 encoder1 in order to improve code reusability and memory safety in modern frameworks, and take advantage of the Another idea if we want to avoid the extra package dependency, is to vendor the sources of Footnotes |
||
| { | ||
| const char ReplacementCharacter = '\uFFFD'; | ||
|
|
||
| int sourceLength = source.Length; | ||
| int destinationLength = destination.Length; | ||
|
|
||
| unsafe | ||
| { | ||
| fixed (char* pSource = &MemoryMarshal.GetReference(source)) | ||
| fixed (byte* pDestination = &MemoryMarshal.GetReference(destination)) | ||
| { | ||
| char* src = pSource, srcEnd = pSource + source.Length; | ||
| byte* dst = pDestination, dstEnd = pDestination + destination.Length; | ||
|
|
||
| while (src < srcEnd) | ||
| { | ||
| char c = *src; | ||
| if (c < 0x80) | ||
| { | ||
| *ptr++ = (byte)c; | ||
| continue; | ||
| if (dstEnd - dst < 1) | ||
| { | ||
| break; | ||
| } | ||
| *dst++ = (byte)c; | ||
| src++; | ||
| } | ||
|
|
||
| if (c < 0x800) | ||
| else if (c < 0x7FF) | ||
| { | ||
| ptr[0] = (byte)(((c >> 6) & 0x1F) | 0xC0); | ||
| ptr[1] = (byte)((c & 0x3F) | 0x80); | ||
| ptr += 2; | ||
| continue; | ||
| if (dstEnd - dst < 2) | ||
| { | ||
| break; | ||
| } | ||
| *dst++ = (byte)((c >> 6) | 0xC0); | ||
| *dst++ = (byte)((c & 0x3F) | 0x80); | ||
| src++; | ||
| } | ||
|
|
||
| if (IsSurrogateChar(c)) | ||
| else | ||
| { | ||
| // surrogate pair | ||
| if (IsHighSurrogateChar(c) && charPtr < strEnd && IsLowSurrogateChar(*charPtr)) | ||
| if (char.IsSurrogate(c)) | ||
| { | ||
| int highSurrogate = c; | ||
| int lowSurrogate = *charPtr++; | ||
| int codepoint = (((highSurrogate - 0xd800) << 10) + lowSurrogate - 0xdc00) + 0x10000; | ||
| ptr[0] = (byte)(((codepoint >> 18) & 0x7) | 0xF0); | ||
| ptr[1] = (byte)(((codepoint >> 12) & 0x3F) | 0x80); | ||
| ptr[2] = (byte)(((codepoint >> 6) & 0x3F) | 0x80); | ||
| ptr[3] = (byte)((codepoint & 0x3F) | 0x80); | ||
| ptr += 4; | ||
| continue; | ||
| // surrogate pair | ||
| if (char.IsHighSurrogate(c) && src - srcEnd < 2 && src[1] is char cLow && char.IsLowSurrogate(cLow)) | ||
| { | ||
| if (dstEnd - dst < 4) | ||
| { | ||
| break; | ||
| } | ||
| int codepoint = ((c - 0xd800) << 10) + cLow - 0xdc00 + 0x10000; | ||
| *dst++ = (byte)((codepoint >> 18) | 0xF0); | ||
| *dst++ = (byte)(((codepoint >> 12) & 0x3F) | 0x80); | ||
| *dst++ = (byte)(((codepoint >> 6) & 0x3F) | 0x80); | ||
| *dst++ = (byte)((codepoint & 0x3F) | 0x80); | ||
| src += 2; | ||
| continue; | ||
| } | ||
|
|
||
| // unpaired high/low surrogate | ||
| if (!allowUnpairedSurrogates) | ||
| { | ||
| c = ReplacementCharacter; | ||
| } | ||
| } | ||
|
|
||
| // unpaired high/low surrogate | ||
| if (!allowUnpairedSurrogates) | ||
| if (dstEnd - dst < 3) | ||
| { | ||
| c = ReplacementCharacter; | ||
| break; | ||
| } | ||
| *dst++ = (byte)((c >> 12) | 0xE0); | ||
| *dst++ = (byte)(((c >> 6) & 0x3F) | 0x80); | ||
| *dst++ = (byte)((c & 0x3F) | 0x80); | ||
| src++; | ||
| } | ||
|
|
||
| ptr[0] = (byte)(((c >> 12) & 0xF) | 0xE0); | ||
| ptr[1] = (byte)(((c >> 6) & 0x3F) | 0x80); | ||
| ptr[2] = (byte)((c & 0x3F) | 0x80); | ||
| ptr += 3; | ||
| } | ||
| } | ||
|
|
||
| Debug.Assert(ptr == bufferPtr + start + byteCount); | ||
| Debug.Assert(charPtr == strEnd); | ||
| } | ||
| } | ||
|
|
||
| internal static unsafe int GetUTF8ByteCount(string str) | ||
| { | ||
| fixed (char* ptr = str) | ||
| { | ||
| return GetUTF8ByteCount(ptr, str.Length); | ||
| charsRead = (int)(src - pSource); | ||
| bytesWritten = (int)(dst - pDestination); | ||
| } | ||
| } | ||
| } | ||
| #endif | ||
|
|
||
| internal static unsafe int GetUTF8ByteCount(char* str, int charCount) | ||
| { | ||
| return GetUTF8ByteCount(str, charCount, int.MaxValue, out _); | ||
| } | ||
|
|
||
| internal static unsafe int GetUTF8ByteCount(char* str, int charCount, int byteLimit, out char* remainder) | ||
| #if !NET | ||
AaronRobinsonMSFT marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| internal static unsafe int GetByteCount(this Encoding encoding, ReadOnlySpan<char> str) | ||
| { | ||
| char* end = str + charCount; | ||
|
|
||
| char* ptr = str; | ||
| int byteCount = 0; | ||
| while (ptr < end) | ||
| fixed (char* ptr = &MemoryMarshal.GetReference(str)) | ||
| { | ||
| int characterSize; | ||
| char c = *ptr++; | ||
| if (c < 0x80) | ||
| { | ||
| characterSize = 1; | ||
| } | ||
| else if (c < 0x800) | ||
| { | ||
| characterSize = 2; | ||
| } | ||
| else if (IsHighSurrogateChar(c) && ptr < end && IsLowSurrogateChar(*ptr)) | ||
| { | ||
| // surrogate pair: | ||
| characterSize = 4; | ||
| ptr++; | ||
| } | ||
| else | ||
| { | ||
| characterSize = 3; | ||
| } | ||
|
|
||
| if (byteCount + characterSize > byteLimit) | ||
| { | ||
| ptr -= (characterSize < 4) ? 1 : 2; | ||
| break; | ||
| } | ||
|
|
||
| byteCount += characterSize; | ||
| return encoding.GetByteCount(ptr, str.Length); | ||
| } | ||
|
|
||
| remainder = ptr; | ||
| return byteCount; | ||
| } | ||
|
|
||
| internal static bool IsSurrogateChar(int c) | ||
| { | ||
| return unchecked((uint)(c - 0xD800)) <= 0xDFFF - 0xD800; | ||
| } | ||
|
|
||
| internal static bool IsHighSurrogateChar(int c) | ||
| { | ||
| return unchecked((uint)(c - 0xD800)) <= 0xDBFF - 0xD800; | ||
| } | ||
|
|
||
| internal static bool IsLowSurrogateChar(int c) | ||
| { | ||
| return unchecked((uint)(c - 0xDC00)) <= 0xDFFF - 0xDC00; | ||
| } | ||
| #endif | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| internal static void ValidateRange(int bufferLength, int start, int byteCount, string byteCountParameterName) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.