Skip to content

Commit

Permalink
Merge pull request #92 from brycelelbach/cartesian_product_internal_i…
Browse files Browse the repository at this point in the history
…teration

Implement internal iteration for `cartesian_product`
  • Loading branch information
brycelelbach authored Aug 2, 2023
2 parents caa30bd + abba492 commit d906839
Show file tree
Hide file tree
Showing 10 changed files with 1,132 additions and 49 deletions.
5 changes: 4 additions & 1 deletion benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ add_executable(internal-iteration-benchmark internal_iteration_benchmark.cpp)
target_link_libraries(internal-iteration-benchmark PUBLIC nanobench flux)

add_executable(sort-benchmark sort_benchmark.cpp)
target_link_libraries(sort-benchmark PUBLIC nanobench flux)
target_link_libraries(sort-benchmark PUBLIC nanobench flux)

add_executable(multidimensional-memset-benchmark multidimensional_memset_benchmark.cpp multidimensional_memset_benchmark_kernels.cpp)
target_link_libraries(multidimensional-memset-benchmark PUBLIC nanobench flux)
93 changes: 93 additions & 0 deletions benchmark/multidimensional_memset_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@

// Copyright (c) 2021 Barry Revzin
// Copyright (c) 2023 NVIDIA Corporation (reply-to: [email protected])
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <nanobench.h>

#include <flux.hpp>

#include <numeric>
#include <ranges>
#include <iostream>

namespace an = ankerl::nanobench;

// Kernels are placed in a separate translation unit to prevent compilers from
// optimizing them based on the input that we'll be giving them and to make it
// easier to study their compiled assembly.
extern void memset_2d_reference(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_2d_std_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_2d_flux_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_diagonal_2d_reference(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_diagonal_2d_std_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M);
extern void memset_diagonal_2d_flux_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M);

int main(int argc, char** argv)
{
int const n_iters = argc > 1 ? std::atoi(argv[1]) : 40;

constexpr flux::distance_t N = 1024;
constexpr flux::distance_t M = 2048;
std::vector<double> A(N * M);

const auto run_benchmark =
[] (auto& bench, auto& A, auto N, auto M, auto name, auto func, auto check) {
std::iota(A.begin(), A.end(), 0);
bench.run(name, [&] { func(A.data(), N, M); });
check(A, N, M);
};

{
const auto check_2d = [] (auto& A, auto N, auto M) {
const auto it = std::ranges::find_if_not(A, [&] (auto e) { return e == 0.0; });
if (it != A.end())
throw false;
};

auto bench = an::Bench()
.minEpochIterations(n_iters)
.relative(true)
.performanceCounters(false);

const auto run_2d_benchmark_impl = [&] (auto name, auto func) {
run_benchmark(bench, A, N, M, name, func, check_2d);
};

#define run_2d_benchmark(func) run_2d_benchmark_impl(#func, func)

run_2d_benchmark(memset_2d_reference);
run_2d_benchmark(memset_2d_std_cartesian_product_iota);
run_2d_benchmark(memset_2d_flux_cartesian_product_iota);
}

{
const auto check_diagonal_2d = [] (auto& A, auto N, auto M) {
for (auto i : std::views::iota(0, N))
for (auto j : std::views::iota(0, M)) {
if (i == j) {
if (A[i * M + j] != 0.0) throw false;
} else {
if (A[i * M + j] != i * M + j) throw false;
}
}
};

auto bench = an::Bench()
.minEpochIterations(n_iters)
.relative(true)
.performanceCounters(false);

const auto run_diagonal_2d_benchmark_impl = [&] (auto name, auto func) {
run_benchmark(bench, A, N, M, name, func, check_diagonal_2d);
};

#define run_diagonal_2d_benchmark(func) run_diagonal_2d_benchmark_impl(#func, func)

run_diagonal_2d_benchmark(memset_diagonal_2d_reference);
run_diagonal_2d_benchmark(memset_diagonal_2d_std_cartesian_product_iota_filter);
run_diagonal_2d_benchmark(memset_diagonal_2d_flux_cartesian_product_iota_filter);
}
}
68 changes: 68 additions & 0 deletions benchmark/multidimensional_memset_benchmark_kernels.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@

// Copyright (c) 2023 NVIDIA Corporation (reply-to: [email protected])
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <flux/op/cartesian_product.hpp>
#include <flux/source/iota.hpp>
#include <flux/op/for_each.hpp>
#include <flux/op/filter.hpp>

#include "ranges_cartesian_product.hpp"

#include <ranges>
#include <algorithm>

void memset_2d_reference(double* A, flux::distance_t N, flux::distance_t M)
{
for (flux::distance_t i = 0; i != N; ++i)
for (flux::distance_t j = 0; j != M; ++j)
A[i * M + j] = 0.0;
}

void memset_2d_std_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M)
{
std::ranges::for_each(
std::views::cartesian_product(std::views::iota(0, N), std::views::iota(0, M)),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

void memset_2d_flux_cartesian_product_iota(double* A, flux::distance_t N, flux::distance_t M)
{
flux::for_each(
flux::cartesian_product(flux::ints(0, N), flux::ints(0, M)),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

void memset_diagonal_2d_reference(double* A, flux::distance_t N, flux::distance_t M)
{
for (flux::distance_t i = 0; i != N; ++i)
for (flux::distance_t j = 0; j != M; ++j)
if (i == j) A[i * M + j] = 0.0;
}

void memset_diagonal_2d_std_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M)
{
std::ranges::for_each(
std::views::cartesian_product(std::views::iota(0, N), std::views::iota(0, M))
| std::views::filter(flux::unpack([] (auto i, auto j) { return i == j; })),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

void memset_diagonal_2d_flux_cartesian_product_iota_filter(double* A, flux::distance_t N, flux::distance_t M)
{
flux::for_each(
flux::cartesian_product(flux::ints(0, N), flux::ints(0, M))
.filter(flux::unpack([] (auto i, auto j) { return i == j; })),
flux::unpack([&] (auto i, auto j) {
A[i * M + j] = 0.0;
}));
}

Loading

0 comments on commit d906839

Please sign in to comment.