Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New hash function alternatives #310

Merged
merged 28 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
d0ffc9f
Add new hasher using XXH64 reference implementation
sleeepyjack Apr 19, 2023
8eff298
Add benchmark for hash functions
sleeepyjack Apr 19, 2023
1b2e6ce
Add Murmur3 integer finalizers/hashers
sleeepyjack Apr 19, 2023
83ebf3a
Add XXH32 hash function
sleeepyjack Apr 19, 2023
d43cf3a
Make sure hash computation does not get optimized out benchmark
sleeepyjack Apr 19, 2023
66e0887
Fixes and optimizations for XXH32
sleeepyjack Apr 19, 2023
16b17b2
Run hash func benchmark in a tight loop
sleeepyjack Apr 20, 2023
f6b102a
Merge remote-tracking branch 'upstream/dev' into xxhash64
sleeepyjack Apr 20, 2023
23e97d5
Port XXH64 to new design
sleeepyjack Apr 20, 2023
0833638
Add more tests
sleeepyjack Apr 20, 2023
01f7120
Merge remote-tracking branch 'upstream/dev' into new-hashers
sleeepyjack May 19, 2023
8d59538
Add guard around int128 in unit test
sleeepyjack May 19, 2023
fb43c0f
Move hash function implementations to a separate directory
sleeepyjack May 19, 2023
311c831
Add default hasher
sleeepyjack May 19, 2023
ff2ce08
Roll back default hasher
sleeepyjack May 19, 2023
216064c
Add benchmark for static_set::find table sizes
sleeepyjack May 19, 2023
cd7469b
Add set contains capacity benchmark
sleeepyjack May 19, 2023
3b58127
Add xxhash copyright disclaimer
sleeepyjack May 19, 2023
ef5d850
Set xxhash as the default hash function
sleeepyjack May 19, 2023
d51a972
Apply suggestions from code review
sleeepyjack May 19, 2023
d72bd38
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] May 19, 2023
d550741
Undo xxhash as default hasher
sleeepyjack May 19, 2023
339f907
Define nvbench size range for testing small data structures
sleeepyjack May 19, 2023
8b1cf87
Remane fmix functions
sleeepyjack May 20, 2023
5d468d5
Remove unnecessary optimizations for better readability
sleeepyjack May 22, 2023
3d3f730
Revert xxhash as default hasher in static_set
sleeepyjack May 25, 2023
3d05d30
Hardcode grid size in benchmark
sleeepyjack May 25, 2023
73e8412
Merge remote-tracking branch 'upstream/dev' into new-hashers
sleeepyjack May 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,6 @@ ConfigureBench(DYNAMIC_MAP_BENCH
hash_table/dynamic_map/find_bench.cu
hash_table/dynamic_map/contains_bench.cu
hash_table/dynamic_map/erase_bench.cu)

ConfigureBench(HASH_BENCH
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Match existing style so this doesn't look like it belongs under the dynamic map benchmarks:

Suggested change
ConfigureBench(HASH_BENCH
###################################################################################################
# - hash benchmarks -------------------------------------------------------------------------------
ConfigureBench(HASH_BENCH

hash_bench.cu)
4 changes: 3 additions & 1 deletion benchmarks/defaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ auto constexpr SKEW = 0.5;
auto constexpr BATCH_SIZE = 1'000'000;
auto constexpr INITIAL_SIZE = 50'000'000;

auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000);
auto const N_RANGE = nvbench::range(10'000'000, 100'000'000, 20'000'000);
auto const N_RANGE_CACHE =
std::vector<nvbench::int64_t>{8'000, 80'000, 800'000, 8'000'000, 80'000'000};
auto const OCCUPANCY_RANGE = nvbench::range(0.1, 0.9, 0.1);
auto const MULTIPLICITY_RANGE = std::vector<nvbench::int64_t>{1, 2, 4, 8, 16};
auto const MATCHING_RATE_RANGE = nvbench::range(0.1, 1., 0.1);
Expand Down
105 changes: 105 additions & 0 deletions benchmarks/hash_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <defaults.hpp>
#include <utils.hpp>

#include <cuco/detail/utils.hpp>
#include <cuco/hash_functions.cuh>

#include <nvbench/nvbench.cuh>

#include <thrust/device_vector.h>

#include <cstdint>

using namespace cuco::benchmark;
using namespace cuco::utility;
Comment on lines +29 to +30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to avoid using declarations. If you write the full types, that will help readers who are unfamiliar with cuco internals (like me) know what namespaces contain the identifiers being used.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, we consider benchmarks as user-facing code so we should not make these kinds of shortcuts. Since these namespaces are a bit wordy, how about two-character namespace aliases?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind the verbosity at all, honestly. 😄 I'd vote cuco::utility over ut or cu or util any day.


template <int32_t Words>
struct large_key {
constexpr __host__ __device__ large_key(int32_t seed) noexcept
{
#pragma unroll Words
for (int32_t i = 0; i < Words; ++i) {
data_[i] = seed;
}
}

private:
int32_t data_[Words];
};

template <int32_t BlockSize, typename Hasher, typename OutputIt>
__global__ void hash_bench_kernel(Hasher hash,
cuco::detail::index_type n,
OutputIt out,
bool materialize_result)
{
cuco::detail::index_type const gid = BlockSize * blockIdx.x + threadIdx.x;
cuco::detail::index_type const loop_stride = gridDim.x * BlockSize;
cuco::detail::index_type idx = gid;
typename Hasher::result_type agg = 0;

while (idx < n) {
typename Hasher::argument_type key(idx);
for (int32_t i = 0; i < 100; ++i) { // execute hash func 100 times
agg += hash(key);
}
idx += loop_stride;
}

if (materialize_result) { out[gid] = agg; }
}

/**
* @brief A benchmark evaluating performance of various hash functions
*/
template <typename Hash>
void hash_eval(nvbench::state& state, nvbench::type_list<Hash>)
{
bool const materialize_result = false;
constexpr auto block_size = 128;
auto const num_keys = state.get_int64_or_default("NumInputs", defaults::N * 10);
auto const grid_size = state.get_int64_or_default("GridSize", SDIV(num_keys, block_size * 16));
sleeepyjack marked this conversation as resolved.
Show resolved Hide resolved

thrust::device_vector<typename Hash::result_type> hash_values((materialize_result) ? num_keys
: 1);

state.add_element_count(num_keys);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
hash_bench_kernel<block_size><<<grid_size, block_size, 0, launch.get_stream()>>>(
Hash{}, num_keys, hash_values.begin(), materialize_result);
});
}

NVBENCH_BENCH_TYPES(
hash_eval,
NVBENCH_TYPE_AXES(nvbench::type_list<cuco::murmurhash3_32<nvbench::int32_t>,
cuco::murmurhash3_32<nvbench::int64_t>,
cuco::murmurhash3_32<large_key<32>>, // 32*4bytes
cuco::xxhash_32<nvbench::int32_t>,
cuco::xxhash_32<nvbench::int64_t>,
cuco::xxhash_32<large_key<32>>,
cuco::xxhash_64<nvbench::int32_t>,
cuco::xxhash_64<nvbench::int64_t>,
cuco::xxhash_64<large_key<32>>,
cuco::murmurhash3_fmix_32<nvbench::int32_t>,
cuco::murmurhash3_fmix_64<nvbench::int64_t>>))
.set_name("hash_function_eval")
.set_type_axes_names({"Hash"})
.set_max_noise(defaults::MAX_NOISE);
7 changes: 7 additions & 0 deletions benchmarks/hash_table/static_set/contains_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,10 @@ NVBENCH_BENCH_TYPES(static_set_contains,
.set_type_axes_names({"Key", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);

NVBENCH_BENCH_TYPES(static_set_contains,
NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
nvbench::type_list<distribution::unique>))
.set_name("static_set_constains_unique_capacity")
.set_type_axes_names({"Key", "Distribution"})
.add_int64_axis("NumInputs", defaults::N_RANGE_CACHE);
7 changes: 7 additions & 0 deletions benchmarks/hash_table/static_set/find_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,10 @@ NVBENCH_BENCH_TYPES(static_set_find,
.set_type_axes_names({"Key", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_float64_axis("MatchingRate", defaults::MATCHING_RATE_RANGE);

NVBENCH_BENCH_TYPES(static_set_find,
NVBENCH_TYPE_AXES(defaults::KEY_TYPE_RANGE,
nvbench::type_list<distribution::unique>))
.set_name("static_set_find_unique_capacity")
.set_type_axes_names({"Key", "Distribution"})
.add_int64_axis("NumInputs", defaults::N_RANGE_CACHE);
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2022, NVIDIA CORPORATION.
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,8 +16,94 @@

#pragma once

#include <cstdint>

namespace cuco::detail {

/**
* @brief The 32bit integer finalizer hash function of `MurmurHash3`.
*
* @throw Key type must be 4 bytes in size
*
* @tparam Key The type of the values to hash
*/
template <typename Key>
struct MurmurHash3_fmix32 {
static_assert(sizeof(Key) == 4, "Key type must be 4 bytes in size.");

using argument_type = Key; ///< The type of the values taken as argument
sleeepyjack marked this conversation as resolved.
Show resolved Hide resolved
using result_type = uint32_t; ///< The type of the hash values produced

/**
* @brief Constructs a MurmurHash3_fmix32 hash function with the given `seed`.
*
* @param seed A custom number to randomize the resulting hash value
*/
__host__ __device__ constexpr MurmurHash3_fmix32(uint32_t seed = 0) : seed_{seed} {}

/**
* @brief Returns a hash value for its argument, as a value of type `result_type`.
*
* @param key The input argument to hash
* @return A resulting hash value for `key`
*/
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
{
uint32_t h = static_cast<uint32_t>(key) ^ seed_;
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}

private:
uint32_t seed_;
};

/**
* @brief The 64bit integer finalizer hash function of `MurmurHash3`.
*
* @throw Key type must be 8 bytes in size
*
* @tparam Key The type of the values to hash
*/
template <typename Key>
struct MurmurHash3_fmix64 {
static_assert(sizeof(Key) == 8, "Key type must be 8 bytes in size.");

using argument_type = Key; ///< The type of the values taken as argument
using result_type = uint64_t; ///< The type of the hash values produced

/**
* @brief Constructs a MurmurHash3_fmix64 hash function with the given `seed`.
*
* @param seed A custom number to randomize the resulting hash value
*/
__host__ __device__ constexpr MurmurHash3_fmix64(uint64_t seed = 0) : seed_{seed} {}

/**
* @brief Returns a hash value for its argument, as a value of type `result_type`.
*
* @param key The input argument to hash
* @return A resulting hash value for `key`
*/
constexpr result_type __host__ __device__ operator()(Key const& key) const noexcept
{
uint64_t h = static_cast<uint64_t>(key) ^ seed_;
h ^= h >> 33;
h *= 0xff51afd7ed558ccd;
h ^= h >> 33;
h *= 0xc4ceb9fe1a85ec53;
h ^= h >> 33;
return h;
}

private:
uint64_t seed_;
};

/**
* @brief A `MurmurHash3_32` hash function to hash the given argument on host and device.
*
Expand All @@ -38,15 +124,12 @@ struct MurmurHash3_32 {
using argument_type = Key; ///< The type of the values taken as argument
using result_type = uint32_t; ///< The type of the hash values produced

/// Default constructor
__host__ __device__ constexpr MurmurHash3_32() : MurmurHash3_32{0} {}

/**
* @brief Constructs a MurmurHash3_32 hash function with the given `seed`.
*
* @param seed A custom number to randomize the resulting hash value
*/
__host__ __device__ constexpr MurmurHash3_32(uint32_t seed) : m_seed(seed) {}
__host__ __device__ constexpr MurmurHash3_32(uint32_t seed = 0) : fmix32_{0}, seed_{seed} {}

/**
* @brief Returns a hash value for its argument, as a value of type `result_type`.
Expand All @@ -60,7 +143,7 @@ struct MurmurHash3_32 {
const uint8_t* const data = (const uint8_t*)&key;
constexpr int nblocks = len / 4;

uint32_t h1 = m_seed;
uint32_t h1 = seed_;
constexpr uint32_t c1 = 0xcc9e2d51;
constexpr uint32_t c2 = 0x1b873593;
//----------
Expand Down Expand Up @@ -92,7 +175,7 @@ struct MurmurHash3_32 {
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
h1 = fmix32_(h1);
return h1;
}

Expand All @@ -102,16 +185,8 @@ struct MurmurHash3_32 {
return (x << r) | (x >> (32 - r));
}

constexpr __host__ __device__ uint32_t fmix32(uint32_t h) const noexcept
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
uint32_t m_seed;
MurmurHash3_fmix32<uint32_t> fmix32_;
uint32_t seed_;
};

} // namespace cuco::detail
} // namespace cuco::detail
Loading