NVIDIA · amukkara · May 11, 2023 · Aug 27, 2023 · Aug 27, 2023 · Aug 28, 2023
@@ -83,3 +83,18 @@ ConfigureBench(DYNAMIC_MAP_BENCH
 # - hash function benchmarks ----------------------------------------------------------------------
 ConfigureBench(HASH_BENCH
   hash_bench.cu)
+
+###################################################################################################
+# - dynamic_bitset benchmarks -------------------------------------------------------------------------
+ConfigureBench(DYNAMIC_BITSET_BENCH
+  trie/dynamic_bitset/find_next_bench.cu
+  trie/dynamic_bitset/rank_bench.cu
+  trie/dynamic_bitset/select_bench.cu
+  trie/dynamic_bitset/size_bench.cu
+  trie/dynamic_bitset/test_bench.cu)
+
+###################################################################################################
+# - trie benchmarks -------------------------------------------------------------------------
+ConfigureBench(TRIE_BENCH
+  trie/insert_bench.cu
+  trie/lookup_bench.cu)
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <defaults.hpp>
+#include <utils.hpp>
+
+#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
+#include <cuco/utility/key_generator.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <thrust/host_vector.h>
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::find_next` performance
+ */
+template <typename Dist>
+void dynamic_bitset_find_next(nvbench::state& state, nvbench::type_list<Dist>)
+{
+  auto const num_bits      = state.get_int64_or_default("NumInputs", defaults::N);
+  using word_type          = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
+  auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
+  thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);
+
+  key_generator gen;
+  gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
+
+  cuco::experimental::detail::dynamic_bitset bitset;
+  bitset.insert(keys.begin(), keys.end(), num_bits);
+
+  const size_t query_size = min(1000 * 1000lu, num_bits / 10);
+  thrust::device_vector<size_t> inputs(query_size);
+  thrust::sequence(inputs.begin(), inputs.end(), 0);
+  thrust::device_vector<size_t> outputs(query_size);
+
+  state.add_element_count(query_size);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    bitset.find_next(inputs.begin(), inputs.end(), outputs.begin());
+  });
+}
+
+NVBENCH_BENCH_TYPES(dynamic_bitset_find_next,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
+  .set_name("dynamic_bitset_find_next")
+  .set_type_axes_names({"Distribution"})
+  .set_max_noise(defaults::MAX_NOISE);
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <defaults.hpp>
+#include <utils.hpp>
+
+#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
+#include <cuco/utility/key_generator.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <thrust/host_vector.h>
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::rank` performance
+ */
+template <typename Dist>
+void dynamic_bitset_rank(nvbench::state& state, nvbench::type_list<Dist>)
+{
+  auto const num_bits      = state.get_int64_or_default("NumInputs", defaults::N);
+  using word_type          = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
+  auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
+  thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);
+
+  key_generator gen;
+  gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
+
+  cuco::experimental::detail::dynamic_bitset bitset;
+  bitset.insert(keys.begin(), keys.end(), num_bits);
+
+  const size_t query_size = min(1000 * 1000lu, num_bits / 10);
+  thrust::device_vector<size_t> inputs(query_size);
+  thrust::sequence(inputs.begin(), inputs.end(), 0);
+  thrust::device_vector<size_t> outputs(query_size);
+
+  state.add_element_count(query_size);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    bitset.rank(inputs.begin(), inputs.end(), outputs.begin());
+  });
+}
+
+NVBENCH_BENCH_TYPES(dynamic_bitset_rank,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
+  .set_name("dynamic_bitset_rank")
+  .set_type_axes_names({"Distribution"})
+  .set_max_noise(defaults::MAX_NOISE);
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <defaults.hpp>
+#include <utils.hpp>
+
+#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
+#include <cuco/utility/key_generator.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <thrust/host_vector.h>
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::select` performance
+ */
+template <typename Dist>
+void dynamic_bitset_select(nvbench::state& state, nvbench::type_list<Dist>)
+{
+  auto const num_bits      = state.get_int64_or_default("NumInputs", defaults::N);
+  using word_type          = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
+  auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
+  thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);
+
+  key_generator gen;
+  gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
+
+  cuco::experimental::detail::dynamic_bitset bitset;
+  bitset.insert(keys.begin(), keys.end(), num_bits);
+
+  const size_t query_size = min(1000 * 1000lu, num_bits / 10);
+  thrust::device_vector<size_t> inputs(query_size);
+  thrust::sequence(inputs.begin(), inputs.end(), 0);
+  thrust::device_vector<size_t> outputs(query_size);
+
+  state.add_element_count(query_size);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    bitset.select(inputs.begin(), inputs.end(), outputs.begin());
+  });
+}
+
+NVBENCH_BENCH_TYPES(dynamic_bitset_select,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
+  .set_name("dynamic_bitset_select")
+  .set_type_axes_names({"Distribution"})
+  .set_max_noise(defaults::MAX_NOISE);
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <defaults.hpp>
+#include <utils.hpp>
+
+#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
+#include <cuco/utility/key_generator.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <thrust/host_vector.h>
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::size` performance
+ */
+template <typename Dist>
+void dynamic_bitset_size(nvbench::state& state, nvbench::type_list<Dist>)
+{
+  auto const num_bits      = state.get_int64_or_default("NumInputs", defaults::N);
+  using word_type          = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
+  auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
+  thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);
+
+  key_generator gen;
+  gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
+
+  cuco::experimental::detail::dynamic_bitset bitset;
+  bitset.insert(keys.begin(), keys.end(), num_bits);
+
+  state.add_element_count(1);
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto const size = bitset.size(); });
+}
+
+NVBENCH_BENCH_TYPES(dynamic_bitset_size,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
+  .set_name("dynamic_bitset_size")
+  .set_type_axes_names({"Distribution"})
+  .set_max_noise(defaults::MAX_NOISE);
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <defaults.hpp>
+#include <utils.hpp>
+
+#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
+#include <cuco/utility/key_generator.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <thrust/host_vector.h>
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::test` performance
+ */
+template <typename Dist>
+void dynamic_bitset_test(nvbench::state& state, nvbench::type_list<Dist>)
+{
+  auto const num_bits      = state.get_int64_or_default("NumInputs", defaults::N);
+  using word_type          = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
+  auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
+  thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);
+
+  key_generator gen;
+  gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());
+
+  cuco::experimental::detail::dynamic_bitset bitset;
+  bitset.insert(keys.begin(), keys.end(), num_bits);
+
+  const size_t query_size = min(1000 * 1000lu, num_bits / 10);
+  thrust::device_vector<size_t> inputs(query_size);
+  thrust::sequence(inputs.begin(), inputs.end(), 0);
+  thrust::device_vector<size_t> outputs(query_size);
+
+  state.add_element_count(query_size);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    bitset.test(inputs.begin(), inputs.end(), outputs.begin());
+  });
+}
+
+NVBENCH_BENCH_TYPES(dynamic_bitset_test,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
+  .set_name("dynamic_bitset_test")
+  .set_type_axes_names({"Distribution"})
+  .set_max_noise(defaults::MAX_NOISE);
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <defaults.hpp>
+#include <utils.hpp>
+
+#include "../../tests/trie/trie_utils.hpp"
+#include <cuco/trie.cuh>
+#include <cuco/utility/key_generator.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+using namespace cuco::benchmark;
+using namespace cuco::utility;
+
+/**
+ * @brief A benchmark evaluating `cuco::experimental::trie::insert` performance
+ */
+void trie_insert(nvbench::state& state)
+{
+  auto const num_keys       = state.get_int64_or_default("NumKeys", 100 * 1000);
+  auto const max_key_length = state.get_int64_or_default("MaxKeyLength", 10);
+
+  using LabelType = int;
+  cuco::experimental::trie<LabelType> trie;
+
+  thrust::host_vector<LabelType> labels;
+  thrust::host_vector<size_t> offsets;
+
+  distribution::unique lengths_dist;
+  distribution::gaussian labels_dist{0.5};
+  generate_labels(labels, offsets, num_keys, max_key_length, lengths_dist, labels_dist);
+  auto keys = sorted_keys(labels, offsets);
+
+  state.add_element_count(num_keys);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    for (auto& key : keys) {
+      trie.insert(key.begin(), key.end());
+    }
+  });
+}
+
+NVBENCH_BENCH(trie_insert)
+  .set_name("trie_insert")
+  .set_max_noise(defaults::MAX_NOISE)
+  .add_int64_axis("NumKeys", std::vector<nvbench::int64_t>{100 * 1000, 1000 * 1000})
+  .add_int64_axis("MaxKeyLength", std::vector<nvbench::int64_t>{4, 8, 16});