Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
119 commits
Select commit Hold shift + click to select a range
51fe975
rename generate_benchmark_input.cpp to .cu
karthikeyann Jan 21, 2022
57b0400
update generator lambdas to use thrust::random
karthikeyann Jan 21, 2022
df2b986
use thrust random generators: numeric, chrono, fixed_point, string
karthikeyann Jan 21, 2022
501bbca
rename copy_benchmark.cpp to cu (thrust code)
karthikeyann Jan 24, 2022
58a6d38
disable debug print, env iterations in gbench fixture
karthikeyann Jan 24, 2022
211778a
Merge branch 'branch-22.04' into fea-benchmark_speedup2
karthikeyann Jan 25, 2022
759f967
fix bug in bounds
karthikeyann Jan 25, 2022
c5f263a
fix static shared_ptr bug
karthikeyann Jan 25, 2022
1b36384
use generator in anyall_benchmark.cpp
karthikeyann Jan 25, 2022
114b9aa
use generator in minmax_benchmark.cpp
karthikeyann Jan 25, 2022
8c27985
use generator in reduce_benchmark.cpp
karthikeyann Jan 25, 2022
709bf0d
use thrust::shuffle in string/copy_benchmark.cu
karthikeyann Jan 25, 2022
cc1d7dc
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Jan 25, 2022
a96dd03
rename to copy.cu
karthikeyann Jan 25, 2022
dab9c59
remove copy_benchmark.cu
karthikeyann Jan 25, 2022
5957f58
revert generate_input.cpp
karthikeyann Jan 25, 2022
ca04d2e
rename to generate_input.cu
karthikeyann Jan 25, 2022
c31fcc2
recheckin generate_input.cu changes with old commits
karthikeyann Jan 25, 2022
33e93a3
rename to url_decode.cu
karthikeyann Jan 25, 2022
f14f476
update code in url_decode.cu
karthikeyann Jan 25, 2022
1db37b3
update cmake on filename change
karthikeyann Jan 25, 2022
21cdc4b
update renamed header path
karthikeyann Jan 25, 2022
6a6cd62
remove old file
karthikeyann Jan 25, 2022
587e725
rename to extract.cu
karthikeyann Jan 25, 2022
5974d16
Revert "rename to extract.cu"
karthikeyann Jan 25, 2022
60deac6
update code in extract.cu (STRINGS_BENCH 82s for 1 iteration from 366…
karthikeyann Jan 25, 2022
89ac0e2
style fix clang-format
karthikeyann Jan 25, 2022
e31612a
fix length bug in run length sample indices size
karthikeyann Jan 27, 2022
a17fd24
add fixed_point and decimal128 support
karthikeyann Jan 27, 2022
e9b0994
add list column generation support
karthikeyann Jan 31, 2022
00afd6f
use gather in string contiguous split bench
karthikeyann Jan 31, 2022
49baa47
null_frequency<0 means no null_mask
karthikeyann Jan 31, 2022
63d5d4c
use generate input in device in contiguous_split benchmark
karthikeyann Jan 31, 2022
57746c0
reduce join bench input generation time (-50s)
karthikeyann Feb 1, 2022
c5f5158
use generate input in device in apply_boolean_mask bench
karthikeyann Feb 2, 2022
6f2c229
fix engine seed for multiple columns
karthikeyann Feb 3, 2022
574ca28
use create_random_table in quantiles bench
karthikeyann Feb 3, 2022
730a71c
replace std::is_integral with cuda::std::is_integral
karthikeyann Feb 3, 2022
b743bf8
replace escaped string with raw string literal
karthikeyann Feb 4, 2022
22b6feb
add create_random_null_mask
karthikeyann Feb 7, 2022
79ce7bb
move input generation to device
karthikeyann Feb 7, 2022
eeca4f7
use cudf::sequence to generate input in device
karthikeyann Feb 7, 2022
ea8dabc
add create_sequence_table
karthikeyann Feb 7, 2022
25776d6
use create_sequence_table in scatter benchmark
karthikeyann Feb 7, 2022
d06f873
device input generation for search benchmark, replace float with int
karthikeyann Feb 7, 2022
66e5113
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Feb 8, 2022
6f5cb04
use device input gen in binaryop benchmarks
karthikeyann Feb 10, 2022
2fca85a
use cuda::std::is_integral_v
karthikeyann Feb 10, 2022
75099b4
optionally disable debug prints
karthikeyann Feb 10, 2022
ae03974
replace BENCHMARK_CAPTURE
karthikeyann Feb 10, 2022
5c3d9c5
replace template param with argument
karthikeyann Feb 10, 2022
562bc3e
cleanup comments
karthikeyann Feb 14, 2022
7f8c25c
rename json.cpp to json.cu
karthikeyann Feb 14, 2022
f11e08d
move input gen to device in json bench
karthikeyann Feb 14, 2022
916ce00
rename generate_input.cpp to generate_input.cu
karthikeyann Feb 14, 2022
d7f0f29
add create_sequence_table, create_random_null_mask
karthikeyann Feb 14, 2022
bb74cc7
fix includes, seed
karthikeyann Feb 15, 2022
0ea4f60
use cuda::std to include int128
karthikeyann Feb 15, 2022
a25241e
use -std=gnu++17 for generate_input.cu for int128 support
karthikeyann Feb 15, 2022
dfd33f2
go back to using BENCHMARK_TEMPLATE_DEFINE_F
karthikeyann Feb 15, 2022
f9f3eec
use create_sequence_table in ast bench
karthikeyann Feb 15, 2022
81ac53a
use create_sequence_table in binops bench
karthikeyann Feb 15, 2022
6c659d4
use create_sequence_table, thrust::shuffle in scatter bench
karthikeyann Feb 15, 2022
9f5c5ba
use cudf::sequence, create_random_null_mask in search bench
karthikeyann Feb 15, 2022
6758095
update copyright year
karthikeyann Feb 15, 2022
718e269
style fix clang format
karthikeyann Feb 15, 2022
704bb72
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Feb 15, 2022
6804412
Merge branch 'branch-22.04' into fea-benchmark_speedup2
karthikeyann Feb 16, 2022
771c915
update copyright year
karthikeyann Feb 16, 2022
72ac32c
merge fix duplicate entry
karthikeyann Feb 17, 2022
532e729
cleanup
karthikeyann Feb 17, 2022
eb3e6b9
cleanup
karthikeyann Feb 17, 2022
0ad778e
address review comments
karthikeyann Feb 17, 2022
9dd9244
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Feb 17, 2022
1894e44
Merge branch 'branch-22.04' into fea-benchmark_speedup2
karthikeyann Feb 19, 2022
bda1f6c
const auto to auto const
karthikeyann Feb 19, 2022
d568d09
address review comments
karthikeyann Feb 19, 2022
993c85d
reduce code duplication
karthikeyann Feb 21, 2022
bdbdf49
Merge branch 'branch-22.04' into fea-benchmark_speedup_2.6
karthikeyann Feb 22, 2022
02ef0d2
Revert "rename generate_input.cpp to generate_input.cu"
karthikeyann Feb 22, 2022
820b417
rename generator functor
karthikeyann Feb 24, 2022
9028a80
simplify create null mask
karthikeyann Feb 24, 2022
4f1f3e8
rename repeat_dtypes to cycle_dtypes
karthikeyann Feb 24, 2022
b31de3a
move cycle_dtypes out for create_sequence_table
karthikeyann Feb 24, 2022
1d4d57a
move cycle_dtypes out of create_random_table
karthikeyann Feb 24, 2022
581e4b8
fix null mask null_count
karthikeyann Feb 24, 2022
204147d
Merge branch 'fea-benchmark_speedup_2.6' of github.com:karthikeyann/c…
karthikeyann Feb 24, 2022
7ff955b
remove num_cols, rename parameter name
karthikeyann Feb 24, 2022
83fc374
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Feb 26, 2022
2b667e6
cleanup includes
karthikeyann Feb 26, 2022
7e1ada1
style fix clangformat
karthikeyann Feb 26, 2022
8bebee0
include fixes
karthikeyann Feb 26, 2022
e1d47f8
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Feb 26, 2022
1135ca3
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Mar 2, 2022
6fad37b
replace sequence with random table as data won't matter here
karthikeyann Mar 2, 2022
26a65ca
move common code to avg length sample indices function
karthikeyann Mar 2, 2022
dbe5916
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Mar 7, 2022
0ec63be
remove unused code
karthikeyann Mar 7, 2022
7b56bfa
address review comments (vuule)
karthikeyann Mar 9, 2022
cf5c9ef
address review comments (vyasr)
karthikeyann Mar 9, 2022
124f744
replace binomial with normal
karthikeyann Mar 9, 2022
a2bbe8a
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Mar 9, 2022
f57863c
replace TEMPLATED_BENCHMARK_F with BENCHMARK_DEFINE_F
karthikeyann Mar 11, 2022
d73b0f0
use more instances of create_sequence_table
karthikeyann Mar 11, 2022
a416cf9
add chrono params, use in drop_duplicates bench
karthikeyann Mar 11, 2022
d76b8c1
use create_random_table more
karthikeyann Mar 11, 2022
51ccd92
include cleanup
karthikeyann Mar 11, 2022
e13c1ac
null frequency cleanup
karthikeyann Mar 11, 2022
5ad2073
rename STREAM_COMPACTION_BENCH to STREAM_COMPACTION_NVBENCH
karthikeyann Mar 11, 2022
af625de
address review comments
karthikeyann Mar 15, 2022
38ee90b
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Mar 15, 2022
156ac3f
add std::clamp to value_generator
karthikeyann Mar 15, 2022
c5b3c47
static_cast fix
karthikeyann Mar 15, 2022
7d17ea5
update more benchmarks input gen to device
karthikeyann Mar 15, 2022
de3b6e8
std::optional for null_frequency
karthikeyann Mar 16, 2022
d02c2fb
null probablilty float to double
karthikeyann Mar 18, 2022
5b59642
add geometric distribution approximation
karthikeyann Mar 18, 2022
1e393a5
more comments addressed
karthikeyann Mar 18, 2022
0810425
addressing review commments (davidwendt)
karthikeyann Mar 22, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

find_package(Threads REQUIRED)

add_library(cudf_datagen STATIC common/generate_input.cpp common/generate_nullmask.cu)
add_library(cudf_datagen STATIC common/generate_input.cu)
target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17)

target_compile_options(
Expand Down Expand Up @@ -136,7 +136,7 @@ ConfigureBench(COPY_IF_ELSE_BENCH copying/copy_if_else.cpp)

# ##################################################################################################
# * transpose benchmark ---------------------------------------------------------------------------
ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cu)
ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cpp)

# ##################################################################################################
# * apply_boolean_mask benchmark ------------------------------------------------------------------
Expand All @@ -145,7 +145,7 @@ ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp
# ##################################################################################################
# * stream_compaction benchmark -------------------------------------------------------------------
ConfigureNVBench(
STREAM_COMPACTION_BENCH stream_compaction/distinct.cpp stream_compaction/unique.cpp
STREAM_COMPACTION_NVBENCH stream_compaction/distinct.cpp stream_compaction/unique.cpp
)

# ##################################################################################################
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <algorithm>
#include <list>
#include <memory>
#include <optional>
#include <vector>

enum class TreeType {
Expand All @@ -48,7 +49,7 @@ static void BM_ast_transform(benchmark::State& state)
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
row_count{table_size},
Nullable ? 0.5 : -1.0);
Nullable ? std::optional<double>{0.5} : std::nullopt);
auto table = source_table->view();

// Create column references
Expand Down
126 changes: 48 additions & 78 deletions cpp/benchmarks/column/concatenate.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -13,15 +13,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cudf/concatenate.hpp>
#include <cudf/table/table.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/fixture/templated_benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <cudf/concatenate.hpp>
#include <cudf/table/table.hpp>

#include <thrust/iterator/constant_iterator.h>

#include <algorithm>
Expand All @@ -33,31 +34,14 @@ class Concatenate : public cudf::benchmark {
template <typename T, bool Nullable>
static void BM_concatenate(benchmark::State& state)
{
using column_wrapper = cudf::test::fixed_width_column_wrapper<T>;

auto const num_rows = state.range(0);
auto const num_cols = state.range(1);

// Create owning columns
std::vector<column_wrapper> columns;
columns.reserve(num_cols);
std::generate_n(std::back_inserter(columns), num_cols, [num_rows]() {
auto iter = thrust::make_counting_iterator(0);
if (Nullable) {
auto valid_iter = thrust::make_transform_iterator(iter, [](auto i) { return i % 3 == 0; });
return column_wrapper(iter, iter + num_rows, valid_iter);
} else {
return column_wrapper(iter, iter + num_rows);
}
});
cudf::size_type const num_rows = state.range(0);
cudf::size_type const num_cols = state.range(1);

// Generate column views
std::vector<cudf::column_view> column_views;
column_views.reserve(columns.size());
std::transform(
columns.begin(), columns.end(), std::back_inserter(column_views), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});
auto input = create_sequence_table(cycle_dtypes({cudf::type_to_id<T>()}, num_cols),
row_count{num_rows},
Nullable ? std::optional<double>{2.0 / 3.0} : std::nullopt);
auto input_columns = input->view();
std::vector<cudf::column_view> column_views(input_columns.begin(), input_columns.end());

CHECK_CUDA(0);

Expand All @@ -69,11 +53,13 @@ static void BM_concatenate(benchmark::State& state)
state.SetBytesProcessed(state.iterations() * num_cols * num_rows * sizeof(T));
}

#define CONCAT_BENCHMARK_DEFINE(type, nullable) \
TEMPLATED_BENCHMARK_F(Concatenate, BM_concatenate, type, nullable) \
->RangeMultiplier(8) \
->Ranges({{1 << 6, 1 << 18}, {2, 1024}}) \
->Unit(benchmark::kMillisecond) \
#define CONCAT_BENCHMARK_DEFINE(type, nullable) \
BENCHMARK_DEFINE_F(Concatenate, BM_concatenate##_##nullable_##nullable) \
(::benchmark::State & st) { BM_concatenate<type, nullable>(st); } \
BENCHMARK_REGISTER_F(Concatenate, BM_concatenate##_##nullable_##nullable) \
->RangeMultiplier(8) \
->Ranges({{1 << 6, 1 << 18}, {2, 1024}}) \
->Unit(benchmark::kMillisecond) \
->UseManualTime();

CONCAT_BENCHMARK_DEFINE(int64_t, false)
Expand All @@ -82,42 +68,22 @@ CONCAT_BENCHMARK_DEFINE(int64_t, true)
template <typename T, bool Nullable>
static void BM_concatenate_tables(benchmark::State& state)
{
using column_wrapper = cudf::test::fixed_width_column_wrapper<T>;

auto const num_rows = state.range(0);
auto const num_cols = state.range(1);
auto const num_tables = state.range(2);

// Create owning columns
std::vector<column_wrapper> columns;
columns.reserve(num_cols);
std::generate_n(std::back_inserter(columns), num_cols * num_tables, [num_rows]() {
auto iter = thrust::make_counting_iterator(0);
if (Nullable) {
auto valid_iter = thrust::make_transform_iterator(iter, [](auto i) { return i % 3 == 0; });
return column_wrapper(iter, iter + num_rows, valid_iter);
} else {
return column_wrapper(iter, iter + num_rows);
}
cudf::size_type const num_rows = state.range(0);
cudf::size_type const num_cols = state.range(1);
cudf::size_type const num_tables = state.range(2);

std::vector<std::unique_ptr<cudf::table>> tables(num_tables);
std::generate_n(tables.begin(), num_tables, [&]() {
return create_sequence_table(cycle_dtypes({cudf::type_to_id<T>()}, num_cols),
row_count{num_rows},
Nullable ? std::optional<double>{2.0 / 3.0} : std::nullopt);
});

// Generate column views
std::vector<std::vector<cudf::column_view>> column_views(num_tables);
for (int i = 0; i < num_tables; ++i) {
column_views[i].reserve(num_cols);
auto it = columns.begin() + (i * num_cols);
std::transform(it, it + num_cols, std::back_inserter(column_views[i]), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});
}

// Generate table views
std::vector<cudf::table_view> table_views;
table_views.reserve(num_tables);
std::transform(column_views.begin(),
column_views.end(),
std::back_inserter(table_views),
[](auto const& col_vec) { return cudf::table_view(col_vec); });
std::vector<cudf::table_view> table_views(num_tables);
std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) mutable {
return table->view();
});

CHECK_CUDA(0);

Expand All @@ -129,11 +95,13 @@ static void BM_concatenate_tables(benchmark::State& state)
state.SetBytesProcessed(state.iterations() * num_cols * num_rows * num_tables * sizeof(T));
}

#define CONCAT_TABLES_BENCHMARK_DEFINE(type, nullable) \
TEMPLATED_BENCHMARK_F(Concatenate, BM_concatenate_tables, type, nullable) \
->RangeMultiplier(8) \
->Ranges({{1 << 8, 1 << 12}, {2, 32}, {2, 128}}) \
->Unit(benchmark::kMillisecond) \
#define CONCAT_TABLES_BENCHMARK_DEFINE(type, nullable) \
BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable) \
(::benchmark::State & st) { BM_concatenate_tables<type, nullable>(st); } \
BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable) \
->RangeMultiplier(8) \
->Ranges({{1 << 8, 1 << 12}, {2, 32}, {2, 128}}) \
->Unit(benchmark::kMillisecond) \
->UseManualTime();

CONCAT_TABLES_BENCHMARK_DEFINE(int64_t, false)
Expand Down Expand Up @@ -187,11 +155,13 @@ static void BM_concatenate_strings(benchmark::State& state)
(sizeof(int32_t) + num_chars)); // offset + chars
}

#define CONCAT_STRINGS_BENCHMARK_DEFINE(nullable) \
TEMPLATED_BENCHMARK_F(ConcatenateStrings, BM_concatenate_strings, nullable) \
->RangeMultiplier(8) \
->Ranges({{1 << 8, 1 << 14}, {8, 128}, {2, 256}}) \
->Unit(benchmark::kMillisecond) \
#define CONCAT_STRINGS_BENCHMARK_DEFINE(nullable) \
BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable) \
(::benchmark::State & st) { BM_concatenate_strings<nullable>(st); } \
BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable) \
->RangeMultiplier(8) \
->Ranges({{1 << 8, 1 << 14}, {8, 128}, {2, 256}}) \
->Unit(benchmark::kMillisecond) \
->UseManualTime();

CONCAT_STRINGS_BENCHMARK_DEFINE(false)
Expand Down
Loading