Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated benchmarks output name field that was not in the format key:value #571

Merged
merged 20 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ Documentation for rocPRIM is available at
## Unreleased rocPRIM-3.3.0 for ROCm 6.3.0

### Fixes
* Fixed an issue where while running rtest.py on windows and passing in an absolute path to --install_dir
causes FileNotFound error


* Fixed an issue where while running rtest.py on windows and passing in an absolute path to `--install_dir` causes a `FileNotFound` error.


## Unreleased rocPRIM-3.2.0 for ROCm 6.2.0
Expand Down
36 changes: 20 additions & 16 deletions benchmark/benchmark_block_run_length_decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,36 +171,40 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N)
HIP_CHECK(hipFree(d_output));
}

#define CREATE_BENCHMARK(IT, OT, MINRL, MAXRL, BS, RPT, DIPT) \
benchmark::RegisterBenchmark("block_run_length_decode<Item Type:" #IT ",Offset Type:" #OT \
",Min RunLength:" #MINRL ",Max RunLength:" #MAXRL \
",BlockSize: " #BS ",Runs Per Thread:" #RPT \
",Decoded Items Per Thread:" #DIPT ">", \
&run_benchmark<IT, OT, MINRL, MAXRL, BS, RPT, DIPT>, \
stream, \
size)
#define CREATE_BENCHMARK(IT, OT, MINRL, MAXRL, BS, RPT, DIPT) \
benchmark::RegisterBenchmark(bench_naming::format_name("{lvl:block,algo:run_length_decode" \
",item_type:" #IT \
",offset_type:" #OT \
Naraenda marked this conversation as resolved.
Show resolved Hide resolved
",min_run_length:" #MINRL \
",max_run_length:" #MAXRL \
",cfg:{block_size:" #BS \
",run_per_thread:" #RPT \
",decoded_items_per_thread:" #DIPT \
"}}" \
).c_str(), \
&run_benchmark<IT, OT, MINRL, MAXRL, BS, RPT, DIPT>, \
stream, \
size)

int main(int argc, char* argv[])
{
cli::Parser parser(argc, argv);
parser.set_optional<size_t>("size", "size", DEFAULT_N, "number of values");
parser.set_optional<int>("trials", "trials", -1, "number of iterations");
parser.set_optional<std::string>("name_format",
"name_format",
"human",
"either: json,human,txt");
parser.run_and_exit_if_error();

// Parse argv
benchmark::Initialize(&argc, argv);
const size_t size = parser.get<size_t>("size");
const int trials = parser.get<int>("trials");
bench_naming::set_format(parser.get<std::string>("name_format"));

std::cout << "benchmark_block_run_length_decode" << std::endl;

// HIP
// // HIP
hipStream_t stream = 0; // default
hipDeviceProp_t devProp;
int device_id = 0;
HIP_CHECK(hipGetDevice(&device_id));
HIP_CHECK(hipGetDeviceProperties(&devProp, device_id));
std::cout << "[HIP] Device name: " << devProp.name << std::endl;

// Add benchmarks
std::vector<benchmark::internal::Benchmark*> benchmarks{
Expand Down
70 changes: 62 additions & 8 deletions benchmark/benchmark_config_dispatch.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

#include "benchmark_utils.hpp"

#include "cmdparser.hpp"
#include <rocprim/device/config_types.hpp>

#include <benchmark/benchmark.h>
Expand All @@ -9,6 +9,11 @@

#include <iostream>

#ifndef DEFAULT_N
const size_t DEFAULT_N = 1024 * 1024 * 32;
#endif


enum class stream_kind
{
default_stream,
Expand Down Expand Up @@ -61,15 +66,64 @@ static void BM_kernel_launch(benchmark::State& state)
hipStreamSynchronize(stream);
}

BENCHMARK_CAPTURE(BM_host_target_arch, default_stream, stream_kind::default_stream);
BENCHMARK_CAPTURE(BM_host_target_arch, per_thread_stream, stream_kind::per_thread_stream);
BENCHMARK_CAPTURE(BM_host_target_arch, explicit_stream, stream_kind::explicit_stream);
BENCHMARK_CAPTURE(BM_host_target_arch, async_stream, stream_kind::async_stream);
BENCHMARK(BM_kernel_launch);
#define CREATE_BENCHMARK(ST, SK) \
benchmark::RegisterBenchmark( \
bench_naming::format_name( \
"{lvl:na" \
",algo:" #ST \
",cfg:default_config}" \
).c_str(), \
&BM_host_target_arch, \
SK \
) \


int main(int argc, char** argv)
{
cli::Parser parser(argc, argv);
parser.set_optional<size_t>("size", "size", DEFAULT_N, "number of values");
parser.set_optional<int>("trials", "trials", -1, "number of iterations");
parser.set_optional<std::string>("name_format",
"name_format",
"human",
"either: json,human,txt");
parser.run_and_exit_if_error();

// Parse argv
benchmark::Initialize(&argc, argv);
add_common_benchmark_info();
const size_t size = parser.get<size_t>("size");
const int trials = parser.get<int>("trials");
bench_naming::set_format(parser.get<std::string>("name_format"));


// HIP

std::vector<benchmark::internal::Benchmark*> benchmarks{
CREATE_BENCHMARK(default_stream, stream_kind::default_stream),
CREATE_BENCHMARK(per_thread_stream, stream_kind::per_thread_stream),
CREATE_BENCHMARK(explicit_stream, stream_kind::explicit_stream),
CREATE_BENCHMARK(async_stream, stream_kind::async_stream)
};


// Use manual timing
for(auto& b : benchmarks)
{
b->UseManualTime();
b->Unit(benchmark::kMillisecond);
}

// Force number of iterations
if(trials > 0)
{
for(auto& b : benchmarks)
{
b->Iterations(trials);
}
}

// Run benchmarks
benchmark::RunSpecifiedBenchmarks();
}
return 0;

}
1 change: 1 addition & 0 deletions benchmark/benchmark_device_scan_by_key.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ int main(int argc, char* argv[])
"name_format",
"human",
"either: json,human,txt");

#ifdef BENCHMARK_CONFIG_TUNING
// optionally run an evenly split subset of benchmarks, when making multiple program invocations
parser.set_optional<int>("parallel_instance",
Expand Down