Skip to content

Commit

Permalink
Merge branch 'dev' into add-std-overloads
Browse files Browse the repository at this point in the history
  • Loading branch information
sleeepyjack authored Oct 3, 2024
2 parents bb4f5f6 + de9d8c8 commit 6798552
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 15 deletions.
17 changes: 10 additions & 7 deletions benchmarks/bloom_filter/add_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void bloom_filter_add(nvbench::state& state,

auto const num_keys = state.get_int64("NumInputs");
auto const filter_size_mb = state.get_int64("FilterSizeMB");
auto const pattern_bits = state.get_int64("PatternBits");
auto const pattern_bits = state.get_int64_or_default("PatternBits", WordsPerBlock);

try {
auto const policy = policy_type{static_cast<uint32_t>(pattern_bits)};
Expand All @@ -70,6 +70,12 @@ void bloom_filter_add(nvbench::state& state,

filter_type filter{num_sub_filters, {}, {static_cast<uint32_t>(pattern_bits)}};

state.collect_dram_throughput();
state.collect_l1_hit_rates();
state.collect_l2_hit_rates();
state.collect_loads_efficiency();
state.collect_stores_efficiency();

add_fpr_summary(state, filter);

state.exec([&](nvbench::launch& launch) {
Expand All @@ -87,8 +93,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_add,
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_int64_axis("NumInputs", {defaults::BF_N})
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE)
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE);

NVBENCH_BENCH_TYPES(bloom_filter_add,
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
Expand All @@ -100,8 +105,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_add,
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_int64_axis("NumInputs", {defaults::BF_N})
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});

NVBENCH_BENCH_TYPES(bloom_filter_add,
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
Expand All @@ -113,5 +117,4 @@ NVBENCH_BENCH_TYPES(bloom_filter_add,
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_int64_axis("NumInputs", {defaults::BF_N})
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});
17 changes: 10 additions & 7 deletions benchmarks/bloom_filter/contains_bench.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void bloom_filter_contains(

auto const num_keys = state.get_int64("NumInputs");
auto const filter_size_mb = state.get_int64("FilterSizeMB");
auto const pattern_bits = state.get_int64("PatternBits");
auto const pattern_bits = state.get_int64_or_default("PatternBits", WordsPerBlock);

try {
auto const policy = policy_type{static_cast<uint32_t>(pattern_bits)};
Expand All @@ -73,6 +73,12 @@ void bloom_filter_contains(

filter_type filter{num_sub_filters, {}, {static_cast<uint32_t>(pattern_bits)}};

state.collect_dram_throughput();
state.collect_l1_hit_rates();
state.collect_l2_hit_rates();
state.collect_loads_efficiency();
state.collect_stores_efficiency();

add_fpr_summary(state, filter);

filter.add(keys.begin(), keys.end());
Expand All @@ -92,8 +98,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains,
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_int64_axis("NumInputs", {defaults::BF_N})
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE)
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE);

NVBENCH_BENCH_TYPES(bloom_filter_contains,
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
Expand All @@ -105,8 +110,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains,
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_int64_axis("NumInputs", {defaults::BF_N})
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});

NVBENCH_BENCH_TYPES(bloom_filter_contains,
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
Expand All @@ -118,5 +122,4 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains,
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
.set_max_noise(defaults::MAX_NOISE)
.add_int64_axis("NumInputs", {defaults::BF_N})
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});
1 change: 0 additions & 1 deletion benchmarks/bloom_filter/defaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ using BF_WORD = nvbench::uint32_t;
static constexpr auto BF_N = 400'000'000;
static constexpr auto BF_SIZE_MB = 2'000;
static constexpr auto BF_WORDS_PER_BLOCK = 8;
static constexpr auto BF_PATTERN_BITS = BF_WORDS_PER_BLOCK;

auto const BF_SIZE_MB_RANGE_CACHE =
std::vector<nvbench::int64_t>{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048};
Expand Down
14 changes: 14 additions & 0 deletions include/cuco/detail/static_set/static_set_ref.inl
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,20 @@ static_set_ref<Key, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::r
this->storage_ref()};
}

template <typename Key,
cuda::thread_scope Scope,
typename KeyEqual,
typename ProbingScheme,
typename StorageRef,
typename... Operators>
template <typename NewKeyEqual>
__host__ __device__ constexpr auto
static_set_ref<Key, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::with_key_eq(
NewKeyEqual const& key_equal) const noexcept
{
return this->rebind_key_eq(key_equal);
}

template <typename Key,
cuda::thread_scope Scope,
typename KeyEqual,
Expand Down
13 changes: 13 additions & 0 deletions include/cuco/static_set_ref.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,19 @@ class static_set_ref
[[nodiscard]] __host__ __device__ constexpr auto rebind_key_eq(
NewKeyEqual const& key_equal) const noexcept;

/**
* @brief Makes a copy of the current device reference with the given key comparator
*
* @tparam NewKeyEqual The new key equal type
*
* @param key_equal New key comparator
*
* @return Copy of the current device ref
*/
template <typename NewKeyEqual>
[[nodiscard]] __host__ __device__ constexpr auto with_key_eq(
NewKeyEqual const& key_equal) const noexcept;

/**
* @brief Makes a copy of the current device reference with the given hasher
*
Expand Down

0 comments on commit 6798552

Please sign in to comment.