-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Faster optimized frozen dictionary creation (3/n) #87688
Changes from all commits
4ad0887
4014ff8
1876a75
a1fb8be
cfe0f2c
e62cb33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,23 +36,21 @@ private FrozenHashTable(int[] hashCodes, Bucket[] buckets, ulong fastModMultipli | |
} | ||
|
||
/// <summary>Initializes a frozen hash table.</summary> | ||
/// <param name="hashCodes">Pre-calculated hash codes.</param> | ||
/// <param name="storeDestIndexFromSrcIndex">A delegate that assigns the index to a specific entry. It's passed the destination and source indices.</param> | ||
/// <param name="hashCodes">Pre-calculated hash codes. When the method finishes, it assigns each value to destination index.</param> | ||
/// <param name="optimizeForReading">true to spend additional effort tuning for subsequent read speed on the table; false to prioritize construction time.</param> | ||
/// <param name="hashCodesAreUnique">True when the input hash codes are already unique (provided from a dictionary of integers for example).</param> | ||
/// <remarks> | ||
/// This method will iterate through the incoming entries and will invoke the hasher on each once. | ||
/// It will then determine the optimal number of hash buckets to allocate and will populate the | ||
/// bucket table. In the process of doing so, it calls out to the <paramref name="storeDestIndexFromSrcIndex"/> to indicate | ||
/// the resulting index for that entry. <see cref="FindMatchingEntries(int, out int, out int)"/> | ||
/// then uses this index to reference individual entries by indexing into <see cref="HashCodes"/>. | ||
/// bucket table. The caller is responsible to consume the values written to <paramref name="hashCodes"/> and update the destination (if desired). | ||
/// <see cref="FindMatchingEntries(int, out int, out int)"/> then uses this index to reference individual entries by indexing into <see cref="HashCodes"/>. | ||
/// </remarks> | ||
/// <returns>A frozen hash table.</returns> | ||
public static FrozenHashTable Create(ReadOnlySpan<int> hashCodes, Action<int, int> storeDestIndexFromSrcIndex, bool optimizeForReading = true) | ||
public static FrozenHashTable Create(Span<int> hashCodes, bool optimizeForReading = true, bool hashCodesAreUnique = false) | ||
{ | ||
// Determine how many buckets to use. This might be fewer than the number of entries | ||
// if any entries have identical hashcodes (not just different hashcodes that might | ||
// map to the same bucket). | ||
int numBuckets = CalcNumBuckets(hashCodes, optimizeForReading); | ||
int numBuckets = CalcNumBuckets(hashCodes, optimizeForReading, hashCodesAreUnique); | ||
ulong fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)numBuckets); | ||
|
||
// Create two spans: | ||
|
@@ -100,8 +98,10 @@ public static FrozenHashTable Create(ReadOnlySpan<int> hashCodes, Action<int, in | |
bucketStart = count; | ||
while (index >= 0) | ||
{ | ||
hashtableHashcodes[count] = hashCodes[index]; | ||
storeDestIndexFromSrcIndex(count, index); | ||
ref int hashCode = ref hashCodes[index]; | ||
hashtableHashcodes[count] = hashCode; | ||
// we have used the hash code for the last time, now we re-use the buffer to store destination index | ||
hashCode = count; | ||
count++; | ||
bucketCount++; | ||
|
||
|
@@ -144,9 +144,10 @@ public void FindMatchingEntries(int hashCode, out int startIndex, out int endInd | |
/// sizes, starting at the exact number of hash codes and incrementing the bucket count by 1 per trial, | ||
/// this is a trade-off between speed of determining a good number of buckets and maximal density. | ||
/// </remarks> | ||
private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForReading) | ||
private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForReading, bool hashCodesAreUnique) | ||
{ | ||
Debug.Assert(hashCodes.Length != 0); | ||
Debug.Assert(!hashCodesAreUnique || new HashSet<int>(hashCodes.ToArray()).Count == hashCodes.Length); | ||
|
||
const double AcceptableCollisionRate = 0.05; // What is a satisfactory rate of hash collisions? | ||
const int LargeInputSizeThreshold = 1000; // What is the limit for an input to be considered "small"? | ||
|
@@ -159,38 +160,44 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR | |
} | ||
|
||
// Filter out duplicate codes, since no increase in buckets will avoid collisions from duplicate input hash codes. | ||
var codes = | ||
HashSet<int>? codes = null; | ||
int uniqueCodesCount = hashCodes.Length; | ||
if (!hashCodesAreUnique) | ||
{ | ||
codes = | ||
#if NETCOREAPP2_0_OR_GREATER | ||
new HashSet<int>(hashCodes.Length); | ||
new HashSet<int>(hashCodes.Length); | ||
#else | ||
new HashSet<int>(); | ||
new HashSet<int>(); | ||
#endif | ||
foreach (int hashCode in hashCodes) | ||
{ | ||
codes.Add(hashCode); | ||
foreach (int hashCode in hashCodes) | ||
{ | ||
codes.Add(hashCode); | ||
} | ||
uniqueCodesCount = codes.Count; | ||
} | ||
Debug.Assert(codes.Count != 0); | ||
Debug.Assert(uniqueCodesCount != 0); | ||
|
||
// In our precomputed primes table, find the index of the smallest prime that's at least as large as our number of | ||
// hash codes. If there are more codes than in our precomputed primes table, which accommodates millions of values, | ||
// give up and just use the next prime. | ||
ReadOnlySpan<int> primes = HashHelpers.Primes; | ||
int minPrimeIndexInclusive = 0; | ||
while ((uint)minPrimeIndexInclusive < (uint)primes.Length && codes.Count > primes[minPrimeIndexInclusive]) | ||
while ((uint)minPrimeIndexInclusive < (uint)primes.Length && uniqueCodesCount > primes[minPrimeIndexInclusive]) | ||
{ | ||
minPrimeIndexInclusive++; | ||
} | ||
|
||
if (minPrimeIndexInclusive >= primes.Length) | ||
{ | ||
return HashHelpers.GetPrime(codes.Count); | ||
return HashHelpers.GetPrime(uniqueCodesCount); | ||
} | ||
|
||
// Determine the largest number of buckets we're willing to use, based on a multiple of the number of inputs. | ||
// For smaller inputs, we allow for a larger multiplier. | ||
int maxNumBuckets = | ||
codes.Count * | ||
(codes.Count >= LargeInputSizeThreshold ? MaxLargeBucketTableMultiplier : MaxSmallBucketTableMultiplier); | ||
uniqueCodesCount * | ||
(uniqueCodesCount >= LargeInputSizeThreshold ? MaxLargeBucketTableMultiplier : MaxSmallBucketTableMultiplier); | ||
|
||
// Find the index of the smallest prime that accommodates our max buckets. | ||
int maxPrimeIndexExclusive = minPrimeIndexInclusive; | ||
|
@@ -209,7 +216,7 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR | |
int[] seenBuckets = ArrayPool<int>.Shared.Rent((maxNumBuckets / BitsPerInt32) + 1); | ||
|
||
int bestNumBuckets = maxNumBuckets; | ||
int bestNumCollisions = codes.Count; | ||
int bestNumCollisions = uniqueCodesCount; | ||
|
||
// Iterate through each available prime between the min and max discovered. For each, compute | ||
// the collision ratio. | ||
|
@@ -222,22 +229,48 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR | |
// Determine the bucket for each hash code and mark it as seen. If it was already seen, | ||
// track it as a collision. | ||
int numCollisions = 0; | ||
foreach (int code in codes) | ||
|
||
if (codes is not null && uniqueCodesCount != hashCodes.Length) | ||
{ | ||
uint bucketNum = (uint)code % (uint)numBuckets; | ||
if ((seenBuckets[bucketNum / BitsPerInt32] & (1 << (int)bucketNum)) != 0) | ||
foreach (int code in codes) | ||
{ | ||
numCollisions++; | ||
if (numCollisions >= bestNumCollisions) | ||
uint bucketNum = (uint)code % (uint)numBuckets; | ||
if ((seenBuckets[bucketNum / BitsPerInt32] & (1 << (int)bucketNum)) != 0) | ||
{ | ||
numCollisions++; | ||
if (numCollisions >= bestNumCollisions) | ||
{ | ||
// If we've already hit the previously known best number of collisions, | ||
// there's no point in continuing as worst case we'd just use that. | ||
break; | ||
} | ||
} | ||
else | ||
{ | ||
// If we've already hit the previously known best number of collisions, | ||
// there's no point in continuing as worst case we'd just use that. | ||
break; | ||
seenBuckets[bucketNum / BitsPerInt32] |= 1 << (int)bucketNum; | ||
} | ||
} | ||
else | ||
} | ||
else | ||
{ | ||
// All of the hash codes in hashCodes are unique. In such scenario, it's faster to iterate over a span. | ||
foreach (int code in hashCodes) | ||
{ | ||
seenBuckets[bucketNum / BitsPerInt32] |= 1 << (int)bucketNum; | ||
uint bucketNum = (uint)code % (uint)numBuckets; | ||
if ((seenBuckets[bucketNum / BitsPerInt32] & (1 << (int)bucketNum)) != 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would computing the bitmask value once (instead of in line 260 and 272) help anything? |
||
{ | ||
numCollisions++; | ||
if (numCollisions >= bestNumCollisions) | ||
{ | ||
// If we've already hit the previously known best number of collisions, | ||
// there's no point in continuing as worst case we'd just use that. | ||
break; | ||
} | ||
} | ||
else | ||
{ | ||
seenBuckets[bucketNum / BitsPerInt32] |= 1 << (int)bucketNum; | ||
} | ||
} | ||
Comment on lines
+257
to
274
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After all of the work done to clone the inputs, allocate the dictionaries, analyze the keys, and so on, iterating over a span instead of a HashSet really provides a meaningful enough gain to duplicate this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes: "Up to +5% gain where string keys turned out to have unique hash codes" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I saw that in the PR description, but I'm still skeptical. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've used a profiler and benchmarked it more than once. This loop is the hottest place in the entire process of creating frozen dictionaries/hash sets (because all other parts got optimized in other PRs and are now relatively cheap). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still skeptical :) But putting aside my skepticism, can you at least dedup this by putting it into an aggressively-inlined helper? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @stephentoub do you mind if I do that in my next PR that is going to touch this area? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure |
||
} | ||
|
||
|
@@ -247,7 +280,7 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR | |
{ | ||
bestNumBuckets = numBuckets; | ||
|
||
if (numCollisions / (double)codes.Count <= AcceptableCollisionRate) | ||
if (numCollisions / (double)uniqueCodesCount <= AcceptableCollisionRate) | ||
{ | ||
break; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would computing the bitmask value once (instead of in line 238 and 250) help anything? e.g.