diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 0704180bad0..3bc6dc10fdf 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -265,7 +265,7 @@ ConfigureBench( string/split.cpp string/substring.cpp string/translate.cpp - string/url_decode.cpp + string/url_decode.cu ) # ################################################################################################## diff --git a/cpp/benchmarks/string/url_decode.cpp b/cpp/benchmarks/string/url_decode.cu similarity index 53% rename from cpp/benchmarks/string/url_decode.cpp rename to cpp/benchmarks/string/url_decode.cu index 6dc79c44437..c460820d788 100644 --- a/cpp/benchmarks/string/url_decode.cpp +++ b/cpp/benchmarks/string/url_decode.cu @@ -16,11 +16,12 @@ #include #include -#include #include #include +#include #include +#include #include #include @@ -28,43 +29,55 @@ #include #include -#include -#include +#include +#include +#include +#include struct url_string_generator { - size_t num_chars; - std::bernoulli_distribution dist; - - url_string_generator(size_t num_chars, double esc_seq_chance) - : num_chars{num_chars}, dist{esc_seq_chance} + char* chars; + double esc_seq_chance; + thrust::minstd_rand engine; + thrust::uniform_real_distribution esc_seq_dist; + url_string_generator(char* c, double esc_seq_chance, thrust::minstd_rand& engine) + : chars(c), esc_seq_chance(esc_seq_chance), engine(engine), esc_seq_dist(0, 1) { } - std::string operator()(std::mt19937& engine) + __device__ void operator()(thrust::tuple str_begin_end) { - std::string str; - str.reserve(num_chars); - while (str.size() < num_chars) { - if (str.size() < num_chars - 3 && dist(engine)) { - str += "%20"; + auto begin = thrust::get<0>(str_begin_end); + auto end = thrust::get<1>(str_begin_end); + engine.discard(begin); + for (auto i = begin; i < end; ++i) { + if (esc_seq_dist(engine) < esc_seq_chance and i < end - 3) { + chars[i] = '%'; + chars[i + 1] = '2'; + chars[i + 2] = '0'; + i += 2; } else { - str.push_back('a'); + chars[i] = 'a'; } } - return str; } }; -cudf::test::strings_column_wrapper generate_column(cudf::size_type num_rows, - cudf::size_type chars_per_row, - double esc_seq_chance) +auto generate_column(cudf::size_type num_rows, cudf::size_type chars_per_row, double esc_seq_chance) { - std::mt19937 engine(1); - url_string_generator url_gen(chars_per_row, esc_seq_chance); - std::vector strings; - strings.reserve(num_rows); - std::generate_n(std::back_inserter(strings), num_rows, [&]() { return url_gen(engine); }); - return cudf::test::strings_column_wrapper(strings.begin(), strings.end()); + std::vector strings{std::string(chars_per_row, 'a')}; + auto col_1a = cudf::test::strings_column_wrapper(strings.begin(), strings.end()); + auto table_a = cudf::repeat(cudf::table_view{{col_1a}}, num_rows); + auto result_col = std::move(table_a->release()[0]); // string column with num_rows aaa... + auto chars_col = result_col->child(cudf::strings_column_view::chars_column_index).mutable_view(); + auto offset_col = result_col->child(cudf::strings_column_view::offsets_column_index).view(); + + auto engine = thrust::default_random_engine{}; + thrust::for_each_n(thrust::device, + thrust::make_zip_iterator(offset_col.begin(), + offset_col.begin() + 1), + num_rows, + url_string_generator{chars_col.begin(), esc_seq_chance, engine}); + return result_col; } class UrlDecode : public cudf::benchmark { @@ -76,7 +89,7 @@ void BM_url_decode(benchmark::State& state, int esc_seq_pct) cudf::size_type const chars_per_row = state.range(1); auto column = generate_column(num_rows, chars_per_row, esc_seq_pct / 100.0); - auto strings_view = cudf::strings_column_view(column); + auto strings_view = cudf::strings_column_view(column->view()); for (auto _ : state) { cuda_event_timer raii(state, true, rmm::cuda_stream_default);