From 20eb80a40018896d9ee4285f8ad4ba49dd3c2600 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Mon, 15 Sep 2025 16:40:42 -0700 Subject: [PATCH 1/3] Reduce allocations in SourceGeneratedDocumentIdentity.Generate 1) The ArrayBuilder that was being used commonly ends up exceeding the re-use size threshold, thus negating it's pooling benefit 2) The ArrayBuilder.ToArray call can be replaced by an upfront array allocation as the size of the array is easy to determine 3) The Encoding.GetBytes calls can be changed to a non-allocating version 4) In NET, can avoid creating the SHA256 object 5) In NET, can use the HashData call to avoid allocating a return array 6) In NET, can create a guid without needing to create a new array --- .../SourceGeneratedDocumentIdentity.cs | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs b/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs index e991d2baddfa5..257f11d1fbe4a 100644 --- a/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs +++ b/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using System.Security.Cryptography; using System.Runtime.Serialization; using System.Text; using Microsoft.CodeAnalysis.Diagnostics; @@ -49,23 +50,40 @@ public static SourceGeneratedDocumentIdentity Generate(ProjectId projectId, stri // dynamic assembly they produced at runtime and passed us that via a custom AnalyzerReference. var assemblyNameToHash = generatorIdentity.AssemblyPath ?? generatorIdentity.AssemblyName; - using var _ = ArrayBuilder.GetInstance(capacity: (assemblyNameToHash.Length + 1 + generatorIdentity.TypeName.Length + 1 + hintName.Length) * 2 + projectIdBytes.Length, out var hashInput); - hashInput.AddRange(projectIdBytes); + var hashInputLength = projectIdBytes.Length + + Encoding.Unicode.GetByteCount(assemblyNameToHash) + + 2 + + Encoding.Unicode.GetByteCount(generatorIdentity.TypeName) + + 2 + + Encoding.Unicode.GetByteCount(hintName); + + var hashInput = new byte[hashInputLength]; + + Array.Copy(projectIdBytes, 0, hashInput, 0, projectIdBytes.Length); + var byteIndex = projectIdBytes.Length; // Add a null to separate the generator name and hint name; since this is effectively a joining of UTF-16 bytes // we'll use a UTF-16 null just to make sure there's absolutely no risk of collision. - hashInput.AddRange(Encoding.Unicode.GetBytes(assemblyNameToHash)); - hashInput.AddRange(0, 0); - hashInput.AddRange(Encoding.Unicode.GetBytes(generatorIdentity.TypeName)); - hashInput.AddRange(0, 0); - hashInput.AddRange(Encoding.Unicode.GetBytes(hintName)); + byteIndex += Encoding.Unicode.GetBytes(assemblyNameToHash, 0, assemblyNameToHash.Length, hashInput, byteIndex); + byteIndex += 2; + byteIndex += Encoding.Unicode.GetBytes(generatorIdentity.TypeName, 0, generatorIdentity.TypeName.Length, hashInput, byteIndex); + byteIndex += 2; + byteIndex += Encoding.Unicode.GetBytes(hintName, 0, hintName.Length, hashInput, byteIndex); // The particular choice of crypto algorithm here is arbitrary and can be always changed as necessary. The only requirement // is it must be collision resistant, and provide enough bits to fill a GUID. - using var crytpoAlgorithm = System.Security.Cryptography.SHA256.Create(); - var hash = crytpoAlgorithm.ComputeHash(hashInput.ToArray()); +#if NET + Span hash = stackalloc byte[SHA256.HashSizeInBytes]; + SHA256.HashData(hashInput, hash); + + var guid = new Guid(hash[..16]); +#else + using var crytpoAlgorithm = SHA256.Create(); + var hash = crytpoAlgorithm.ComputeHash(hashInput); + Array.Resize(ref hash, 16); var guid = new Guid(hash); +#endif var documentId = DocumentId.CreateFromSerialized(projectId, guid, isSourceGenerated: true, hintName); From 26df08c69c0a994937cee7f5736009075ad618dc Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 16 Sep 2025 11:04:51 -0700 Subject: [PATCH 2/3] Switch from Encoding.GEtBytes and using SHA256 to just using Checksums --- .../Workspace/Solution/Checksum_Factory.cs | 10 ++-- .../SourceGeneratedDocumentIdentity.cs | 46 ++++++------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/src/Workspaces/Core/Portable/Workspace/Solution/Checksum_Factory.cs b/src/Workspaces/Core/Portable/Workspace/Solution/Checksum_Factory.cs index 32b4f480eeaa1..07508b3cc5401 100644 --- a/src/Workspaces/Core/Portable/Workspace/Solution/Checksum_Factory.cs +++ b/src/Workspaces/Core/Portable/Workspace/Solution/Checksum_Factory.cs @@ -44,6 +44,9 @@ public static Checksum Create(IEnumerable values) } public static Checksum Create(ImmutableArray values) + => Create(ImmutableCollectionsMarshal.AsArray(values).AsSpan()); + + public static Checksum Create(ReadOnlySpan values) { using var pooledHash = s_incrementalHashPool.GetPooledObject(); @@ -159,11 +162,12 @@ public static Checksum Create(ImmutableArray checksums) } public static Checksum Create(ImmutableArray bytes) - { - var source = ImmutableCollectionsMarshal.AsArray(bytes).AsSpan(); + => Create(ImmutableCollectionsMarshal.AsArray(bytes).AsSpan()); + public static Checksum Create(ReadOnlySpan bytes) + { Span destination = stackalloc byte[XXHash128SizeBytes]; - XxHash128.Hash(source, destination); + XxHash128.Hash(bytes, destination); return From(destination); } diff --git a/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs b/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs index 257f11d1fbe4a..846344ca0eed8 100644 --- a/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs +++ b/src/Workspaces/Core/Portable/Workspace/Solution/SourceGeneratedDocumentIdentity.cs @@ -42,47 +42,29 @@ public static SourceGeneratedDocumentIdentity Generate(ProjectId projectId, stri // ensure we don't have collisions. var generatorIdentity = SourceGeneratorIdentity.Create(generator, analyzerReference); - // Combine the strings together; we'll use Encoding.Unicode since that'll match the underlying format; this can be made much - // faster once we're on .NET Core since we could directly treat the strings as ReadOnlySpan. - var projectIdBytes = projectId.Id.ToByteArray(); - // The assembly path should exist in any normal scenario; the hashing of the name only would apply if the user loaded a // dynamic assembly they produced at runtime and passed us that via a custom AnalyzerReference. var assemblyNameToHash = generatorIdentity.AssemblyPath ?? generatorIdentity.AssemblyName; - var hashInputLength = projectIdBytes.Length - + Encoding.Unicode.GetByteCount(assemblyNameToHash) - + 2 - + Encoding.Unicode.GetByteCount(generatorIdentity.TypeName) - + 2 - + Encoding.Unicode.GetByteCount(hintName); - - var hashInput = new byte[hashInputLength]; +#if NET + Span bytesToChecksum = stackalloc byte[16]; + projectId.Id.TryWriteBytes(bytesToChecksum); +#else + var bytesToChecksum = projectId.Id.ToByteArray().AsSpan(); +#endif - Array.Copy(projectIdBytes, 0, hashInput, 0, projectIdBytes.Length); - var byteIndex = projectIdBytes.Length; + ReadOnlySpan stringsToChecksum = [assemblyNameToHash, generatorIdentity.TypeName, hintName]; + var stringChecksum = Checksum.Create(stringsToChecksum); + var byteChecksum = Checksum.Create(bytesToChecksum); + var compositeChecksum = Checksum.Create(stringChecksum, byteChecksum); - // Add a null to separate the generator name and hint name; since this is effectively a joining of UTF-16 bytes - // we'll use a UTF-16 null just to make sure there's absolutely no risk of collision. - byteIndex += Encoding.Unicode.GetBytes(assemblyNameToHash, 0, assemblyNameToHash.Length, hashInput, byteIndex); - byteIndex += 2; - byteIndex += Encoding.Unicode.GetBytes(generatorIdentity.TypeName, 0, generatorIdentity.TypeName.Length, hashInput, byteIndex); - byteIndex += 2; - byteIndex += Encoding.Unicode.GetBytes(hintName, 0, hintName.Length, hashInput, byteIndex); + Span checksumAsBytes = stackalloc byte[16]; + compositeChecksum.WriteTo(checksumAsBytes); - // The particular choice of crypto algorithm here is arbitrary and can be always changed as necessary. The only requirement - // is it must be collision resistant, and provide enough bits to fill a GUID. #if NET - Span hash = stackalloc byte[SHA256.HashSizeInBytes]; - SHA256.HashData(hashInput, hash); - - var guid = new Guid(hash[..16]); + var guid = new Guid(checksumAsBytes); #else - using var crytpoAlgorithm = SHA256.Create(); - var hash = crytpoAlgorithm.ComputeHash(hashInput); - - Array.Resize(ref hash, 16); - var guid = new Guid(hash); + var guid = new Guid(checksumAsBytes.ToArray()); #endif var documentId = DocumentId.CreateFromSerialized(projectId, guid, isSourceGenerated: true, hintName); From 8b237ad0649580e1141cb7e56b8e5704e528ff65 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 16 Sep 2025 12:01:41 -0700 Subject: [PATCH 3/3] disambiguate test usage of Checksum.Create --- src/Workspaces/CoreTest/ChecksumTests.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Workspaces/CoreTest/ChecksumTests.cs b/src/Workspaces/CoreTest/ChecksumTests.cs index 3e0b08b288ecc..b43458f2fa541 100644 --- a/src/Workspaces/CoreTest/ChecksumTests.cs +++ b/src/Workspaces/CoreTest/ChecksumTests.cs @@ -154,8 +154,8 @@ public void ValidateChecksumFromSpanSameAsChecksumFromBytes10() [Fact] public void StringArraysProduceDifferentResultsThanConcatenation() { - var checksum1 = Checksum.Create(["goo", "bar"]); - var checksum2 = Checksum.Create(["go", "obar"]); + var checksum1 = Checksum.Create(ImmutableArray.Create("goo", "bar")); + var checksum2 = Checksum.Create(ImmutableArray.Create("go", "obar")); var checksum3 = Checksum.Create("goobar"); Assert.NotEqual(checksum1, checksum2); Assert.NotEqual(checksum2, checksum3); @@ -175,9 +175,9 @@ public void DoNotProduceNullChecksum() Assert.NotEqual(Checksum.Null, Checksum.Create(ImmutableArray.Empty)); Assert.NotEqual(Checksum.Null, Checksum.Create(ImmutableArray.Empty)); - Assert.NotEqual(Checksum.Null, Checksum.Create([""])); - Assert.NotEqual(Checksum.Null, Checksum.Create(["\0"])); - Assert.NotEqual(Checksum.Null, Checksum.Create(new string?[] { null })); + Assert.NotEqual(Checksum.Null, Checksum.Create(ImmutableArray.Create(""))); + Assert.NotEqual(Checksum.Null, Checksum.Create(ImmutableArray.Create("\0"))); + Assert.NotEqual(Checksum.Null, Checksum.Create(new string?[] { null }.AsEnumerable())); Assert.NotEqual(Checksum.Null, Checksum.Create(new MemoryStream())); Assert.NotEqual(Checksum.Null, Checksum.Create(stackalloc Checksum[] { Checksum.Null })); Assert.NotEqual(Checksum.Null, Checksum.Create(ImmutableArray.Create(Checksum.Null)));