From c12067d68d9637f707351716de11dc76388b45fe Mon Sep 17 00:00:00 2001 From: Jan Krivanek Date: Thu, 8 Feb 2024 13:30:07 +0100 Subject: [PATCH 1/3] =?UTF-8?q?Revert=20"Revert=20"Shorten=20UTD=20marker?= =?UTF-8?q?=20file=20(#9387)"=20except=20Microsoft.Common.Curr=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5af9301a6d46fd40ab1420467d3291519f62c7e6. --- src/Build/Evaluation/IntrinsicFunctions.cs | 5 +- .../BinaryLogger/BuildEventArgsWriter.cs | 40 +----- src/StringTools/FowlerNollVo1aHash.cs | 135 ++++++++++++++++++ 3 files changed, 144 insertions(+), 36 deletions(-) create mode 100644 src/StringTools/FowlerNollVo1aHash.cs diff --git a/src/Build/Evaluation/IntrinsicFunctions.cs b/src/Build/Evaluation/IntrinsicFunctions.cs index 3fff5c28e65..28e8fe62c93 100644 --- a/src/Build/Evaluation/IntrinsicFunctions.cs +++ b/src/Build/Evaluation/IntrinsicFunctions.cs @@ -14,6 +14,7 @@ using Microsoft.Build.Shared; using Microsoft.Build.Shared.FileSystem; using Microsoft.Build.Utilities; +using Microsoft.NET.StringTools; using Microsoft.Win32; // Needed for DoesTaskHostExistForParameters @@ -398,11 +399,11 @@ internal static string ConvertFromBase64(string toDecode) } /// - /// Hash the string independent of bitness and target framework. + /// Hash the string independent of bitness, target framework and default codepage of the environment. /// internal static int StableStringHash(string toHash) { - return CommunicationsUtilities.GetHashCode(toHash); + return FowlerNollVo1aHash.ComputeHash32(toHash); } /// diff --git a/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs b/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs index 0a21182e83c..701c7b6b5fc 100644 --- a/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs +++ b/src/Build/Logging/BinaryLogger/BuildEventArgsWriter.cs @@ -14,6 +14,8 @@ using Microsoft.Build.Framework; using Microsoft.Build.Framework.Profiler; using Microsoft.Build.Shared; +using Microsoft.Build.Utilities; +using Microsoft.NET.StringTools; #nullable disable @@ -1259,9 +1261,9 @@ private void Write(IExtendedBuildEventArgs extendedData) internal readonly struct HashKey : IEquatable { - private readonly ulong value; + private readonly long value; - private HashKey(ulong i) + private HashKey(long i) { value = i; } @@ -1274,13 +1276,13 @@ public HashKey(string text) } else { - value = FnvHash64.GetHashCode(text); + value = FowlerNollVo1aHash.ComputeHash64Fast(text); } } public static HashKey Combine(HashKey left, HashKey right) { - return new HashKey(FnvHash64.Combine(left.value, right.value)); + return new HashKey(FowlerNollVo1aHash.Combine64(left.value, right.value)); } public HashKey Add(HashKey other) => Combine(this, other); @@ -1310,35 +1312,5 @@ public override string ToString() return value.ToString(); } } - - internal static class FnvHash64 - { - public const ulong Offset = 14695981039346656037; - public const ulong Prime = 1099511628211; - - public static ulong GetHashCode(string text) - { - ulong hash = Offset; - - unchecked - { - for (int i = 0; i < text.Length; i++) - { - char ch = text[i]; - hash = (hash ^ ch) * Prime; - } - } - - return hash; - } - - public static ulong Combine(ulong left, ulong right) - { - unchecked - { - return (left ^ right) * Prime; - } - } - } } } diff --git a/src/StringTools/FowlerNollVo1aHash.cs b/src/StringTools/FowlerNollVo1aHash.cs new file mode 100644 index 00000000000..7532a688669 --- /dev/null +++ b/src/StringTools/FowlerNollVo1aHash.cs @@ -0,0 +1,135 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.InteropServices; +using System; + +namespace Microsoft.NET.StringTools +{ + /// + /// Fowler/Noll/Vo hashing. + /// + public static class FowlerNollVo1aHash + { + // Fowler/Noll/Vo hashing. + // http://www.isthe.com/chongo/tech/comp/fnv/ + // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash + // http://www.isthe.com/chongo/src/fnv/hash_32a.c + + // 32 bit FNV prime and offset basis for FNV-1a. + private const uint fnvPrimeA32Bit = 16777619; + private const uint fnvOffsetBasisA32Bit = 2166136261; + + // 64 bit FNV prime and offset basis for FNV-1a. + private const long fnvPrimeA64Bit = 1099511628211; + private const long fnvOffsetBasisA64Bit = unchecked((long)14695981039346656037); + + /// + /// Computes 32 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding). + /// + /// String to be hashed. + /// 32 bit signed hash + public static int ComputeHash32(string text) + { + uint hash = fnvOffsetBasisA32Bit; + +#if NET35 + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + byte b = (byte)ch; + hash ^= b; + hash *= fnvPrimeA32Bit; + + b = (byte)(ch >> 8); + hash ^= b; + hash *= fnvPrimeA32Bit; + } + } +#else + ReadOnlySpan span = MemoryMarshal.Cast(text.AsSpan()); + foreach (byte b in span) + { + hash = unchecked((hash ^ b) * fnvPrimeA32Bit); + } +#endif + + return unchecked((int)hash); + } + + /// + /// Computes 64 bit Fowler/Noll/Vo-1a hash optimized for ASCII strings. + /// The hashing algorithm considers only the first 8 bits of each character. + /// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a + /// + /// String to be hashed. + /// 64 bit unsigned hash + public static long ComputeHash64Fast(string text) + { + long hash = fnvOffsetBasisA64Bit; + + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + + hash = (hash ^ ch) * fnvPrimeA64Bit; + } + } + + return hash; + } + + /// + /// Computes 64 bit Fowler/Noll/Vo-1a hash of a string (regardless of encoding). + /// + /// String to be hashed. + /// 64 bit unsigned hash + public static long ComputeHash64(string text) + { + long hash = fnvOffsetBasisA64Bit; + +#if NET35 + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + byte b = (byte)ch; + hash ^= b; + hash *= fnvPrimeA64Bit; + + b = (byte)(ch >> 8); + hash ^= b; + hash *= fnvPrimeA64Bit; + } + } +#else + ReadOnlySpan span = MemoryMarshal.Cast(text.AsSpan()); + foreach (byte b in span) + { + hash = unchecked((hash ^ b) * fnvPrimeA64Bit); + } +#endif + + return hash; + } + + /// + /// Combines two 64 bit hashes generated by class into one. + /// + /// First hash value to be combined. + /// Second hash value to be combined. + /// + public static long Combine64(long left, long right) + { + unchecked + { + return (left ^ right) * fnvPrimeA64Bit; + } + } + } +} From e94460ac368227f9e0fa7df3d15d24698e6a1d36 Mon Sep 17 00:00:00 2001 From: Jan Krivanek Date: Thu, 8 Feb 2024 13:37:30 +0100 Subject: [PATCH 2/3] Make FNV hash compatible across endianness --- src/StringTools/FowlerNollVo1aHash.cs | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/src/StringTools/FowlerNollVo1aHash.cs b/src/StringTools/FowlerNollVo1aHash.cs index 7532a688669..29de98b5c59 100644 --- a/src/StringTools/FowlerNollVo1aHash.cs +++ b/src/StringTools/FowlerNollVo1aHash.cs @@ -33,7 +33,6 @@ public static int ComputeHash32(string text) { uint hash = fnvOffsetBasisA32Bit; -#if NET35 unchecked { for (int i = 0; i < text.Length; i++) @@ -48,20 +47,14 @@ public static int ComputeHash32(string text) hash *= fnvPrimeA32Bit; } } -#else - ReadOnlySpan span = MemoryMarshal.Cast(text.AsSpan()); - foreach (byte b in span) - { - hash = unchecked((hash ^ b) * fnvPrimeA32Bit); - } -#endif return unchecked((int)hash); } /// - /// Computes 64 bit Fowler/Noll/Vo-1a hash optimized for ASCII strings. - /// The hashing algorithm considers only the first 8 bits of each character. + /// Computes 64 bit Fowler/Noll/Vo-1a inspired hash of a string. + /// The hashing algorithm process the data by the whole 16bit chars, instead of by bytes. + /// this speeds up the hashing process almost by 2x, while not significantly increasing collisions rate. /// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a /// /// String to be hashed. @@ -92,7 +85,6 @@ public static long ComputeHash64(string text) { long hash = fnvOffsetBasisA64Bit; -#if NET35 unchecked { for (int i = 0; i < text.Length; i++) @@ -107,13 +99,6 @@ public static long ComputeHash64(string text) hash *= fnvPrimeA64Bit; } } -#else - ReadOnlySpan span = MemoryMarshal.Cast(text.AsSpan()); - foreach (byte b in span) - { - hash = unchecked((hash ^ b) * fnvPrimeA64Bit); - } -#endif return hash; } From e1be5b9719d08a2be7e6f6e8adb12ca7774f8537 Mon Sep 17 00:00:00 2001 From: Jan Krivanek Date: Thu, 8 Feb 2024 14:42:57 +0100 Subject: [PATCH 3/3] Add StableStringHash intrinsic function overloads --- .../Evaluation/Expander_Tests.cs | 42 ++++++++++++++++-- src/Build/Evaluation/Expander.cs | 5 +++ src/Build/Evaluation/IntrinsicFunctions.cs | 43 +++++++++++++++++-- src/StringTools/FowlerNollVo1aHash.cs | 25 +++++++++++ 4 files changed, 108 insertions(+), 7 deletions(-) diff --git a/src/Build.UnitTests/Evaluation/Expander_Tests.cs b/src/Build.UnitTests/Evaluation/Expander_Tests.cs index 030c6db60c3..434caf7def2 100644 --- a/src/Build.UnitTests/Evaluation/Expander_Tests.cs +++ b/src/Build.UnitTests/Evaluation/Expander_Tests.cs @@ -3890,8 +3890,14 @@ public void PropertyStringConstructorConsumingItemMetadata(string metadatumName, result.ShouldBe(metadatumValue); } - [Fact] - public void PropertyFunctionHashCodeSameOnlyIfStringSame() + public static IEnumerable GetHashAlgoTypes() + => Enum.GetNames(typeof(IntrinsicFunctions.StringHashingAlgorithm)) + .Append(null) + .Select(t => new object[] { t }); + + [Theory] + [MemberData(nameof(GetHashAlgoTypes))] + public void PropertyFunctionHashCodeSameOnlyIfStringSame(string hashType) { PropertyDictionary pg = new PropertyDictionary(); Expander expander = new Expander(pg, FileSystems.Default); @@ -3906,8 +3912,9 @@ public void PropertyFunctionHashCodeSameOnlyIfStringSame() "cat12s", "cat1s" }; - int[] hashes = stringsToHash.Select(toHash => - (int)expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('{toHash}'))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance)) + string hashTypeString = hashType == null ? "" : $", '{hashType}'"; + object[] hashes = stringsToHash.Select(toHash => + expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('{toHash}'{hashTypeString}))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance)) .ToArray(); for (int a = 0; a < hashes.Length; a++) { @@ -3925,6 +3932,33 @@ public void PropertyFunctionHashCodeSameOnlyIfStringSame() } } + [Theory] + [MemberData(nameof(GetHashAlgoTypes))] + public void PropertyFunctionHashCodeReturnsExpectedType(string hashType) + { + PropertyDictionary pg = new PropertyDictionary(); + Expander expander = new Expander(pg, FileSystems.Default); + Type expectedType; + + expectedType = hashType switch + { + null => typeof(int), + "Legacy" => typeof(int), + "Fnv1a32bit" => typeof(int), + "Fnv1a32bitFast" => typeof(int), + "Fnv1a64bit" => typeof(long), + "Fnv1a64bitFast" => typeof(long), + "Sha256" => typeof(string), + _ => throw new ArgumentOutOfRangeException(nameof(hashType)) + }; + + + string hashTypeString = hashType == null ? "" : $", '{hashType}'"; + object hashValue = expander.ExpandPropertiesLeaveTypedAndEscaped($"$([MSBuild]::StableStringHash('FooBar'{hashTypeString}))", ExpanderOptions.ExpandProperties, MockElementLocation.Instance); + + hashValue.ShouldBeOfType(expectedType); + } + [Theory] [InlineData("easycase")] [InlineData("")] diff --git a/src/Build/Evaluation/Expander.cs b/src/Build/Evaluation/Expander.cs index c998910190e..21bb6f60c0d 100644 --- a/src/Build/Evaluation/Expander.cs +++ b/src/Build/Evaluation/Expander.cs @@ -4162,6 +4162,11 @@ private bool TryExecuteWellKnownFunction(out object returnVal, object objectInst returnVal = IntrinsicFunctions.StableStringHash(arg0); return true; } + else if (TryGetArgs(args, out string arg1, out string arg2) && Enum.TryParse(arg2, true, out var hashAlgorithm)) + { + returnVal = IntrinsicFunctions.StableStringHash(arg1, hashAlgorithm); + return true; + } } else if (string.Equals(_methodMethodName, nameof(IntrinsicFunctions.AreFeaturesEnabled), StringComparison.OrdinalIgnoreCase)) { diff --git a/src/Build/Evaluation/IntrinsicFunctions.cs b/src/Build/Evaluation/IntrinsicFunctions.cs index 28e8fe62c93..da7047c0db3 100644 --- a/src/Build/Evaluation/IntrinsicFunctions.cs +++ b/src/Build/Evaluation/IntrinsicFunctions.cs @@ -398,12 +398,49 @@ internal static string ConvertFromBase64(string toDecode) return Encoding.UTF8.GetString(Convert.FromBase64String(toDecode)); } + internal enum StringHashingAlgorithm + { + // Legacy way of calculating StableStringHash - which was derived from string GetHashCode + Legacy, + // FNV-1a 32bit hash + Fnv1a32bit, + // Custom FNV-1a 32bit hash - optimized for speed by hashing by the whole chars (not individual bytes) + Fnv1a32bitFast, + // FNV-1a 64bit hash + Fnv1a64bit, + // Custom FNV-1a 64bit hash - optimized for speed by hashing by the whole chars (not individual bytes) + Fnv1a64bitFast, + // SHA256 hash - gets the hex string of the hash (with no prefix) + Sha256 + } + /// /// Hash the string independent of bitness, target framework and default codepage of the environment. /// - internal static int StableStringHash(string toHash) - { - return FowlerNollVo1aHash.ComputeHash32(toHash); + internal static object StableStringHash(string toHash) + => StableStringHash(toHash, StringHashingAlgorithm.Legacy); + + internal static object StableStringHash(string toHash, StringHashingAlgorithm algo) => + algo switch + { + StringHashingAlgorithm.Legacy => CommunicationsUtilities.GetHashCode(toHash), + StringHashingAlgorithm.Fnv1a32bit => FowlerNollVo1aHash.ComputeHash32(toHash), + StringHashingAlgorithm.Fnv1a32bitFast => FowlerNollVo1aHash.ComputeHash32Fast(toHash), + StringHashingAlgorithm.Fnv1a64bit => FowlerNollVo1aHash.ComputeHash64(toHash), + StringHashingAlgorithm.Fnv1a64bitFast => FowlerNollVo1aHash.ComputeHash64Fast(toHash), + StringHashingAlgorithm.Sha256 => CalculateSha256(toHash), + _ => throw new ArgumentOutOfRangeException(nameof(algo), algo, null) + }; + + private static string CalculateSha256(string toHash) + { + var sha = System.Security.Cryptography.SHA256.Create(); + var hashResult = new StringBuilder(); + foreach (byte theByte in sha.ComputeHash(Encoding.UTF8.GetBytes(toHash))) + { + hashResult.Append(theByte.ToString("x2")); + } + return hashResult.ToString(); } /// diff --git a/src/StringTools/FowlerNollVo1aHash.cs b/src/StringTools/FowlerNollVo1aHash.cs index 29de98b5c59..5a9a876e4c0 100644 --- a/src/StringTools/FowlerNollVo1aHash.cs +++ b/src/StringTools/FowlerNollVo1aHash.cs @@ -51,6 +51,31 @@ public static int ComputeHash32(string text) return unchecked((int)hash); } + /// + /// Computes 32 bit Fowler/Noll/Vo-1a inspired hash of a string. + /// The hashing algorithm process the data by the whole 16bit chars, instead of by bytes. + /// this speeds up the hashing process almost by 2x, while not significantly increasing collisions rate. + /// Analysis: https://github.com/KirillOsenkov/MSBuildStructuredLog/wiki/String-Hashing#faster-fnv-1a + /// + /// String to be hashed. + /// 32 bit unsigned hash + public static int ComputeHash32Fast(string text) + { + uint hash = fnvOffsetBasisA32Bit; + + unchecked + { + for (int i = 0; i < text.Length; i++) + { + char ch = text[i]; + + hash = (hash ^ ch) * fnvPrimeA32Bit; + } + } + + return unchecked((int)hash); + } + /// /// Computes 64 bit Fowler/Noll/Vo-1a inspired hash of a string. /// The hashing algorithm process the data by the whole 16bit chars, instead of by bytes.