Skip to content
Draft

Trie #350

Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
85de9c2
Trie checkin
amukkara May 11, 2023
c724f61
Coding style
amukkara Aug 27, 2023
ac5af70
Change template parameter name
amukkara Aug 27, 2023
ffdde23
Includes
amukkara Aug 28, 2023
18e9501
Misc coding style changes
amukkara Aug 28, 2023
c5c1ec5
Remove bitvector template parameter
amukkara Aug 29, 2023
152f716
Remove some host-side structures
amukkara Aug 29, 2023
643ba31
bit_vector -> dynamic_bitset
amukkara Aug 30, 2023
53b2ba4
dynamic_bitset API change
amukkara Sep 7, 2023
fdca1c2
Template variable naming style
amukkara Sep 10, 2023
10f1b05
Minor
amukkara Sep 10, 2023
748c1ea
Add allocator template parameter
amukkara Sep 10, 2023
d2e339b
Misc coding style
amukkara Sep 10, 2023
74effe6
Comments
amukkara Sep 10, 2023
c6d300f
Run a dummy test() on bitsets
amukkara Sep 12, 2023
c57e491
Use allocator for member classes
amukkara Sep 12, 2023
4e4a08f
Buffer bitset updates on host
amukkara Sep 13, 2023
6a8af91
Minor changes in lookup test
amukkara Oct 2, 2023
248d71f
Use iterators as parameters to insert()
amukkara Oct 11, 2023
25eaa9d
Move key generation utilities into different file
amukkara Oct 11, 2023
2753a38
Limit grid size to 128
amukkara Oct 11, 2023
01c35df
Add performance test
amukkara Oct 11, 2023
7151733
Store labels in host vector during insertions
amukkara Oct 12, 2023
9f7d6d3
Parallelize input preprocessing
amukkara Oct 12, 2023
342bcee
Move dynamic_bitset tests into tests/trie
amukkara Oct 12, 2023
81f9d46
Consolidate trie utils in a single file
amukkara Oct 12, 2023
03821ef
find_next host-bulk API in dynamic_bitset
amukkara Oct 13, 2023
f70d528
Dynamic bitset benchmarks
amukkara Oct 13, 2023
bd4549e
Consistent use of LabelType
amukkara Oct 13, 2023
5f67117
Trie benchmarks
amukkara Oct 13, 2023
660bdc0
Reorganize trie utils
amukkara Oct 13, 2023
6f2b0b8
Use cuco key generators
amukkara Oct 13, 2023
799d968
Add key length and key count nvbench axes
amukkara Oct 13, 2023
025c59a
Add namespaces
amukkara Oct 18, 2023
6184e50
Remove custom key comparator
amukkara Oct 18, 2023
7f1d083
Include multiple label types
amukkara Oct 18, 2023
5ed0498
Change key distribution to uniform
amukkara Oct 18, 2023
6842144
Move file
amukkara Oct 18, 2023
4dcb7ce
Delete file read utilities
amukkara Oct 18, 2023
9892378
Change nvbench axes order
amukkara Oct 18, 2023
fc3fa7a
Trie examples
amukkara Oct 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,18 @@ ConfigureBench(DYNAMIC_MAP_BENCH
# - hash function benchmarks ----------------------------------------------------------------------
ConfigureBench(HASH_BENCH
hash_bench.cu)

###################################################################################################
# - dynamic_bitset benchmarks -------------------------------------------------------------------------
ConfigureBench(DYNAMIC_BITSET_BENCH
trie/dynamic_bitset/find_next_bench.cu
trie/dynamic_bitset/rank_bench.cu
trie/dynamic_bitset/select_bench.cu
trie/dynamic_bitset/size_bench.cu
trie/dynamic_bitset/test_bench.cu)

###################################################################################################
# - trie benchmarks -------------------------------------------------------------------------
ConfigureBench(TRIE_BENCH
trie/insert_bench.cu
trie/lookup_bench.cu)
62 changes: 62 additions & 0 deletions benchmarks/trie/dynamic_bitset/find_next_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <defaults.hpp>
#include <utils.hpp>

#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
#include <cuco/utility/key_generator.hpp>

#include <nvbench/nvbench.cuh>

#include <thrust/host_vector.h>

using namespace cuco::benchmark;
using namespace cuco::utility;

/**
* @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::find_next` performance
*/
template <typename Dist>
void dynamic_bitset_find_next(nvbench::state& state, nvbench::type_list<Dist>)
{
auto const num_bits = state.get_int64_or_default("NumInputs", defaults::N);
using word_type = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);

key_generator gen;
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());

cuco::experimental::detail::dynamic_bitset bitset;
bitset.insert(keys.begin(), keys.end(), num_bits);

const size_t query_size = min(1000 * 1000lu, num_bits / 10);
thrust::device_vector<size_t> inputs(query_size);
thrust::sequence(inputs.begin(), inputs.end(), 0);
thrust::device_vector<size_t> outputs(query_size);

state.add_element_count(query_size);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
bitset.find_next(inputs.begin(), inputs.end(), outputs.begin());
});
}

NVBENCH_BENCH_TYPES(dynamic_bitset_find_next,
NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
.set_name("dynamic_bitset_find_next")
.set_type_axes_names({"Distribution"})
.set_max_noise(defaults::MAX_NOISE);
62 changes: 62 additions & 0 deletions benchmarks/trie/dynamic_bitset/rank_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <defaults.hpp>
#include <utils.hpp>

#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
#include <cuco/utility/key_generator.hpp>

#include <nvbench/nvbench.cuh>

#include <thrust/host_vector.h>

using namespace cuco::benchmark;
using namespace cuco::utility;

/**
* @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::rank` performance
*/
template <typename Dist>
void dynamic_bitset_rank(nvbench::state& state, nvbench::type_list<Dist>)
{
auto const num_bits = state.get_int64_or_default("NumInputs", defaults::N);
using word_type = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);

key_generator gen;
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());

cuco::experimental::detail::dynamic_bitset bitset;
bitset.insert(keys.begin(), keys.end(), num_bits);

const size_t query_size = min(1000 * 1000lu, num_bits / 10);
thrust::device_vector<size_t> inputs(query_size);
thrust::sequence(inputs.begin(), inputs.end(), 0);
thrust::device_vector<size_t> outputs(query_size);

state.add_element_count(query_size);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
bitset.rank(inputs.begin(), inputs.end(), outputs.begin());
});
}

NVBENCH_BENCH_TYPES(dynamic_bitset_rank,
NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
.set_name("dynamic_bitset_rank")
.set_type_axes_names({"Distribution"})
.set_max_noise(defaults::MAX_NOISE);
62 changes: 62 additions & 0 deletions benchmarks/trie/dynamic_bitset/select_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <defaults.hpp>
#include <utils.hpp>

#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
#include <cuco/utility/key_generator.hpp>

#include <nvbench/nvbench.cuh>

#include <thrust/host_vector.h>

using namespace cuco::benchmark;
using namespace cuco::utility;

/**
* @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::select` performance
*/
template <typename Dist>
void dynamic_bitset_select(nvbench::state& state, nvbench::type_list<Dist>)
{
auto const num_bits = state.get_int64_or_default("NumInputs", defaults::N);
using word_type = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);

key_generator gen;
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());

cuco::experimental::detail::dynamic_bitset bitset;
bitset.insert(keys.begin(), keys.end(), num_bits);

const size_t query_size = min(1000 * 1000lu, num_bits / 10);
thrust::device_vector<size_t> inputs(query_size);
thrust::sequence(inputs.begin(), inputs.end(), 0);
thrust::device_vector<size_t> outputs(query_size);

state.add_element_count(query_size);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
bitset.select(inputs.begin(), inputs.end(), outputs.begin());
});
}

NVBENCH_BENCH_TYPES(dynamic_bitset_select,
NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
.set_name("dynamic_bitset_select")
.set_type_axes_names({"Distribution"})
.set_max_noise(defaults::MAX_NOISE);
56 changes: 56 additions & 0 deletions benchmarks/trie/dynamic_bitset/size_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <defaults.hpp>
#include <utils.hpp>

#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
#include <cuco/utility/key_generator.hpp>

#include <nvbench/nvbench.cuh>

#include <thrust/host_vector.h>

using namespace cuco::benchmark;
using namespace cuco::utility;

/**
* @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::size` performance
*/
template <typename Dist>
void dynamic_bitset_size(nvbench::state& state, nvbench::type_list<Dist>)
{
auto const num_bits = state.get_int64_or_default("NumInputs", defaults::N);
using word_type = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);

key_generator gen;
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());

cuco::experimental::detail::dynamic_bitset bitset;
bitset.insert(keys.begin(), keys.end(), num_bits);

state.add_element_count(1);
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto const size = bitset.size(); });
}

NVBENCH_BENCH_TYPES(dynamic_bitset_size,
NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
.set_name("dynamic_bitset_size")
.set_type_axes_names({"Distribution"})
.set_max_noise(defaults::MAX_NOISE);
62 changes: 62 additions & 0 deletions benchmarks/trie/dynamic_bitset/test_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <defaults.hpp>
#include <utils.hpp>

#include <cuco/detail/trie/dynamic_bitset/dynamic_bitset.cuh>
#include <cuco/utility/key_generator.hpp>

#include <nvbench/nvbench.cuh>

#include <thrust/host_vector.h>

using namespace cuco::benchmark;
using namespace cuco::utility;

/**
* @brief A benchmark evaluating `cuco::experimental::detail::dynamic_bitset::test` performance
*/
template <typename Dist>
void dynamic_bitset_test(nvbench::state& state, nvbench::type_list<Dist>)
{
auto const num_bits = state.get_int64_or_default("NumInputs", defaults::N);
using word_type = typename cuco::experimental::detail::dynamic_bitset<>::word_type;
auto const bits_per_word = cuco::experimental::detail::dynamic_bitset<>::bits_per_word;
thrust::host_vector<word_type> keys((num_bits - 1) / bits_per_word + 1);

key_generator gen;
gen.generate(dist_from_state<Dist>(state), keys.begin(), keys.end());

cuco::experimental::detail::dynamic_bitset bitset;
bitset.insert(keys.begin(), keys.end(), num_bits);

const size_t query_size = min(1000 * 1000lu, num_bits / 10);
thrust::device_vector<size_t> inputs(query_size);
thrust::sequence(inputs.begin(), inputs.end(), 0);
thrust::device_vector<size_t> outputs(query_size);

state.add_element_count(query_size);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
bitset.test(inputs.begin(), inputs.end(), outputs.begin());
});
}

NVBENCH_BENCH_TYPES(dynamic_bitset_test,
NVBENCH_TYPE_AXES(nvbench::type_list<distribution::gaussian>))
.set_name("dynamic_bitset_test")
.set_type_axes_names({"Distribution"})
.set_max_noise(defaults::MAX_NOISE);
60 changes: 60 additions & 0 deletions benchmarks/trie/insert_bench.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <defaults.hpp>
#include <utils.hpp>

#include "../../tests/trie/trie_utils.hpp"
#include <cuco/trie.cuh>
#include <cuco/utility/key_generator.hpp>

#include <nvbench/nvbench.cuh>

using namespace cuco::benchmark;
using namespace cuco::utility;

/**
* @brief A benchmark evaluating `cuco::experimental::trie::insert` performance
*/
void trie_insert(nvbench::state& state)
{
auto const num_keys = state.get_int64_or_default("NumKeys", 100 * 1000);
auto const max_key_length = state.get_int64_or_default("MaxKeyLength", 10);

using LabelType = int;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to make this a configurable parameter?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

7f1d083 makes LabelType configurable. Testing char, int for now.

cuco::experimental::trie<LabelType> trie;

thrust::host_vector<LabelType> labels;
thrust::host_vector<size_t> offsets;

distribution::unique lengths_dist;
distribution::gaussian labels_dist{0.5};
generate_labels(labels, offsets, num_keys, max_key_length, lengths_dist, labels_dist);
auto keys = sorted_keys(labels, offsets);

state.add_element_count(num_keys);
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
for (auto& key : keys) {
trie.insert(key.begin(), key.end());
}
});
}

NVBENCH_BENCH(trie_insert)
.set_name("trie_insert")
.set_max_noise(defaults::MAX_NOISE)
.add_int64_axis("NumKeys", std::vector<nvbench::int64_t>{100 * 1000, 1000 * 1000})
.add_int64_axis("MaxKeyLength", std::vector<nvbench::int64_t>{4, 8, 16});
Loading