Skip to content

Commit

Permalink
Merge 60e255e into e977ecc
Browse files Browse the repository at this point in the history
  • Loading branch information
hkaiser authored Apr 28, 2024
2 parents e977ecc + 60e255e commit 02ec247
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 94 deletions.
44 changes: 26 additions & 18 deletions libs/core/algorithms/tests/performance/foreach_scaling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ std::uint64_t averageout_plain_for(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -52,7 +52,7 @@ std::uint64_t averageout_plain_for_iter(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -72,7 +72,7 @@ std::uint64_t averageout_parallel_foreach(
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -92,7 +92,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)

if (num_overlapping_loops <= 0)
{
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
measure_task_foreach(data_representation, exec).wait();
Expand All @@ -103,7 +103,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
std::vector<hpx::shared_future<void>> tests;
tests.resize(num_overlapping_loops);

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
{
Expand All @@ -124,7 +124,7 @@ std::uint64_t averageout_sequential_foreach(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -142,7 +142,7 @@ std::uint64_t averageout_parallel_forloop(
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -167,7 +167,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)

if (num_overlapping_loops <= 0)
{
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
measure_task_forloop(data_representation, exec).wait();
Expand All @@ -178,7 +178,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
std::vector<hpx::shared_future<void>> tests;
tests.resize(num_overlapping_loops);

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
{
Expand All @@ -199,7 +199,7 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -212,8 +212,8 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
int hpx_main(hpx::program_options::variables_map& vm)
{
// pull values from cmd
std::size_t vector_size = vm["vector_size"].as<std::size_t>();
bool csvoutput = vm.count("csv_output") != 0;
std::size_t const vector_size = vm["vector_size"].as<std::size_t>();
bool const csvoutput = vm.count("csv_output") != 0;
delay = vm["work_delay"].as<int>();
test_count = vm["test_count"].as<int>();
chunk_size = vm["chunk_size"].as<int>();
Expand Down Expand Up @@ -264,8 +264,8 @@ int hpx_main(hpx::program_options::variables_map& vm)
std::uint64_t task_time_forloop = 0;
std::uint64_t seq_time_forloop = 0;

std::uint64_t plain_time_for = averageout_plain_for(vector_size);
std::uint64_t plain_time_for_iter =
std::uint64_t const plain_time_for = averageout_plain_for(vector_size);
std::uint64_t const plain_time_for_iter =
averageout_plain_for_iter(vector_size);

if (vm["executor"].as<std::string>() == "forkjoin")
Expand Down Expand Up @@ -467,11 +467,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
<< std::left
<< "Parallel Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_foreach) / par_time_foreach) << "\n"
<< (static_cast<double>(seq_time_foreach) /
par_time_foreach)
<< "\n"
<< std::left
<< "Task Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_foreach) / task_time_foreach) << "\n"
<< (static_cast<double>(seq_time_foreach) /
task_time_foreach)
<< "\n"
<< std::flush;

std::cout << "-------------Average-(for_loop)----------------\n"
Expand All @@ -490,11 +494,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
<< std::left
<< "Parallel Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_forloop) / par_time_forloop) << "\n"
<< (static_cast<double>(seq_time_forloop) /
par_time_forloop)
<< "\n"
<< std::left
<< "Task Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_forloop) / task_time_forloop) << "\n";
<< (static_cast<double>(seq_time_forloop) /
task_time_forloop)
<< "\n";
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ void test_sorted_until3_seq()
std::iota(std::begin(c1), std::end(c1), 0);
std::iota(std::begin(c2), std::end(c2), 0);

auto until1 =
auto const until1 =
hpx::ranges::is_sorted_until(c1, std::less<int>(), [&](int x) {
if (x == 0)
{
Expand All @@ -730,7 +730,7 @@ void test_sorted_until3_seq()
return x;
}
});
auto until2 =
auto const until2 =
hpx::ranges::is_sorted_until(c2, std::less<int>(), [&](int x) {
if (x == static_cast<int>(c2.size()) / 3 ||
x == 2 * static_cast<int>(c2.size()) / 3)
Expand All @@ -743,8 +743,8 @@ void test_sorted_until3_seq()
}
});

auto test_index1 = std::begin(c1) + 1;
auto test_index2 = std::begin(c2) + c2.size() / 3;
auto const test_index1 = std::begin(c1) + 1;
auto const test_index2 = std::begin(c2) + c2.size() / 3;

HPX_TEST(until1 == test_index1);
HPX_TEST(until2 == test_index2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace hpx::compute::host {
{
}

explicit native_handle_type(hpx::threads::mask_type mask)
explicit native_handle_type(hpx::threads::mask_type const& mask)
: mask_(mask)
{
}
Expand All @@ -56,7 +56,7 @@ namespace hpx::compute::host {
target() = default;

// Constructs target from a given mask of processing units
explicit target(hpx::threads::mask_type mask)
explicit target(hpx::threads::mask_type const& mask)
: handle_(mask)
{
}
Expand Down
12 changes: 11 additions & 1 deletion libs/core/compute_local/src/host_target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,26 @@ namespace hpx::compute::host {
hpx::threads::mask_type const mask = native_handle().get_device();
std::size_t const mask_size = hpx::threads::mask_size(mask);

bool found_one = false;

std::size_t num_thread = 0;
for (/**/; num_thread != num_os_threads; ++num_thread)
{
if (hpx::threads::bit_and(
mask, rp.get_pu_mask(num_thread), mask_size))
{
found_one = true;
break;
}
}
return std::make_pair(num_thread, hpx::threads::count(mask));

if (!found_one)
{
return std::make_pair(static_cast<std::size_t>(-1), 0);
}

return std::make_pair(
num_thread, (std::min)(num_os_threads, hpx::threads::count(mask)));
}

void target::serialize(serialization::input_archive& ar, unsigned int)
Expand Down
6 changes: 3 additions & 3 deletions libs/core/coroutines/include/hpx/coroutines/thread_enums.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,15 @@ namespace hpx::threads {
/// local thread number associated with this hint. Local thread numbers
/// are indexed from zero. It is up to the scheduler to decide how to
/// interpret thread numbers that are larger than the number of threads
/// available to the scheduler. Typically thread numbers will wrap
/// available to the scheduler. Typically, thread numbers will wrap
/// around when too large.
thread = 1,

/// A hint that tells the scheduler to prefer scheduling a task on the
/// NUMA domain associated with this hint. NUMA domains are indexed from
/// zero. It is up to the scheduler to decide how to interpret NUMA
/// domain indices that are larger than the number of available NUMA
/// domains to the scheduler. Typically indices will wrap around when
/// domains to the scheduler. Typically, indices will wrap around when
/// too large.
numa = 2,
};
Expand Down Expand Up @@ -295,7 +295,7 @@ namespace hpx::threads {
}

///////////////////////////////////////////////////////////////////////////
/// \enum thread_placement_hint
/// \enum thread_execution_hint
///
/// The type of hint given to the scheduler related running a thread as a
/// child directly in the context of the parent thread
Expand Down
Loading

0 comments on commit 02ec247

Please sign in to comment.