Skip to content

Commit 98f4b33

Browse files
author
Raghuveer Devulapalli
authored
Merge pull request #23 from r-devulap/gbench
Use Google benchmark
2 parents 1735e86 + c5bb290 commit 98f4b33

File tree

7 files changed

+174
-320
lines changed

7 files changed

+174
-320
lines changed

Makefile

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,33 @@ BENCHDIR = ./benchmarks
55
UTILS = ./utils
66
SRCS = $(wildcard $(SRCDIR)/*.hpp)
77
TESTS = $(wildcard $(TESTDIR)/*.cpp)
8+
BENCHS = $(wildcard $(BENCHDIR)/*.cpp)
89
TESTOBJS = $(patsubst $(TESTDIR)/%.cpp,$(TESTDIR)/%.o,$(TESTS))
10+
BENCHOBJS = $(patsubst $(BENCHDIR)/%.cpp,$(BENCHDIR)/%.o,$(BENCHS))
11+
BENCHOBJS := $(filter-out $(BENCHDIR)/main.o ,$(BENCHOBJS))
912
CXXFLAGS += -I$(SRCDIR) -I$(UTILS)
1013
GTESTCFLAGS = `pkg-config --cflags gtest_main`
1114
GTESTLDFLAGS = `pkg-config --libs gtest_main`
15+
GBENCHCFLAGS = `pkg-config --cflags benchmark`
16+
GBENCHLDFLAGS = `pkg-config --libs benchmark`
1217
MARCHFLAG = -march=sapphirerapids -O3
1318

1419
all : test bench
1520

1621
$(UTILS)/cpuinfo.o : $(UTILS)/cpuinfo.cpp
17-
$(CXX) $(CXXFLAGS) -c $(UTILS)/cpuinfo.cpp -o $(UTILS)/cpuinfo.o
22+
$(CXX) $(CXXFLAGS) -c $(UTILS)/cpuinfo.cpp -o $(UTILS)/cpuinfo.o
1823

1924
$(TESTDIR)/%.o : $(TESTDIR)/%.cpp $(SRCS)
20-
$(CXX) $(CXXFLAGS) $(MARCHFLAG) $(GTESTCFLAGS) -c $< -o $@
25+
$(CXX) $(CXXFLAGS) $(MARCHFLAG) $(GTESTCFLAGS) -c $< -o $@
2126

2227
test: $(TESTOBJS) $(UTILS)/cpuinfo.o $(SRCS)
23-
$(CXX) $(TESTOBJS) $(UTILS)/cpuinfo.o $(MARCHFLAG) $(CXXFLAGS) -lgtest_main $(GTESTLDFLAGS) -o testexe
28+
$(CXX) $(TESTOBJS) $(UTILS)/cpuinfo.o $(MARCHFLAG) $(CXXFLAGS) -lgtest_main $(GTESTLDFLAGS) -o testexe
2429

25-
bench: $(BENCHDIR)/main.cpp $(SRCS) $(UTILS)/cpuinfo.o
26-
$(CXX) $(BENCHDIR)/main.cpp $(CXXFLAGS) $(UTILS)/cpuinfo.o $(MARCHFLAG) -o benchexe
30+
$(BENCHDIR)/%.o : $(BENCHDIR)/%.cpp $(SRCS)
31+
$(CXX) $(CXXFLAGS) $(MARCHFLAG) $(GBENCHCFLAGS) -c $< -o $@
32+
33+
bench: $(BENCHOBJS) $(UTILS)/cpuinfo.o
34+
$(CXX) $(BENCHDIR)/main.cpp $(BENCHOBJS) $(MARCHFLAG) $(CXXFLAGS) $(GBENCHLDFLAGS) $(UTILS)/cpuinfo.o -o benchexe
2735

2836
meson:
2937
meson setup --warnlevel 0 --buildtype plain builddir

benchmarks/bench.hpp

Lines changed: 0 additions & 128 deletions
This file was deleted.

benchmarks/bench_qsort.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#include <benchmark/benchmark.h>
2+
#include "rand_array.h"
3+
#include "cpuinfo.h"
4+
#include "avx512-16bit-qsort.hpp"
5+
#include "avx512-32bit-qsort.hpp"
6+
#include "avx512-64bit-qsort.hpp"
7+
8+
template <typename T>
9+
static void avx512_qsort(benchmark::State& state) {
10+
if (!cpu_has_avx512bw()) {
11+
state.SkipWithMessage("Requires AVX512 BW ISA");
12+
}
13+
if ((sizeof(T) == 2) && (!cpu_has_avx512_vbmi2())) {
14+
state.SkipWithMessage("Requires AVX512 VBMI2 ISA");
15+
}
16+
// Perform setup here
17+
size_t ARRSIZE = state.range(0);
18+
std::vector<T> arr;
19+
std::vector<T> arr_bkp;
20+
21+
/* Initialize elements is reverse order */
22+
arr = get_uniform_rand_array<T>(ARRSIZE);
23+
arr_bkp = arr;
24+
25+
/* call avx512 quicksort */
26+
for (auto _ : state) {
27+
avx512_qsort<T>(arr.data(), ARRSIZE);
28+
state.PauseTiming();
29+
arr = arr_bkp;
30+
state.ResumeTiming();
31+
}
32+
}
33+
34+
template <typename T>
35+
static void stdsort(benchmark::State& state) {
36+
// Perform setup here
37+
size_t ARRSIZE = state.range(0);
38+
std::vector<T> arr;
39+
std::vector<T> arr_bkp;
40+
41+
/* Initialize elements is reverse order */
42+
arr = get_uniform_rand_array<T>(ARRSIZE);
43+
arr_bkp = arr;
44+
45+
/* call avx512 quicksort */
46+
for (auto _ : state) {
47+
std::sort(arr.begin(), arr.end());
48+
state.PauseTiming();
49+
arr = arr_bkp;
50+
state.ResumeTiming();
51+
}
52+
}
53+
54+
// Register the function as a benchmark
55+
BENCHMARK(avx512_qsort<float>)->Arg(10000)->Arg(1000000);
56+
BENCHMARK(stdsort<float>)->Arg(10000)->Arg(1000000);
57+
BENCHMARK(avx512_qsort<uint32_t>)->Arg(10000)->Arg(1000000);
58+
BENCHMARK(stdsort<uint32_t>)->Arg(10000)->Arg(1000000);
59+
BENCHMARK(avx512_qsort<int32_t>)->Arg(10000)->Arg(1000000);
60+
BENCHMARK(stdsort<int32_t>)->Arg(10000)->Arg(1000000);
61+
62+
BENCHMARK(avx512_qsort<double>)->Arg(10000)->Arg(1000000);
63+
BENCHMARK(stdsort<double>)->Arg(10000)->Arg(1000000);
64+
BENCHMARK(avx512_qsort<uint64_t>)->Arg(10000)->Arg(1000000);
65+
BENCHMARK(stdsort<uint64_t>)->Arg(10000)->Arg(1000000);
66+
BENCHMARK(avx512_qsort<int64_t>)->Arg(10000)->Arg(1000000);
67+
BENCHMARK(stdsort<int64_t>)->Arg(10000)->Arg(10000000);
68+
69+
//BENCHMARK(avx512_qsort<float16>)->Arg(10000)->Arg(1000000);
70+
BENCHMARK(avx512_qsort<uint16_t>)->Arg(10000)->Arg(1000000);
71+
BENCHMARK(stdsort<uint16_t>)->Arg(10000)->Arg(1000000);
72+
BENCHMARK(avx512_qsort<int16_t>)->Arg(10000)->Arg(1000000);
73+
BENCHMARK(stdsort<int16_t>)->Arg(10000)->Arg(10000000);

benchmarks/bench_qsortfp16.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include <benchmark/benchmark.h>
2+
#include "rand_array.h"
3+
#include "cpuinfo.h"
4+
#include "avx512fp16-16bit-qsort.hpp"
5+
6+
template <typename T>
7+
static void avx512_qsort(benchmark::State& state) {
8+
if (cpu_has_avx512fp16()) {
9+
// Perform setup here
10+
size_t ARRSIZE = state.range(0);
11+
std::vector<T> arr;
12+
std::vector<T> arr_bkp;
13+
14+
/* Initialize elements */
15+
for (size_t jj = 0; jj < ARRSIZE; ++jj) {
16+
_Float16 temp = (float) rand() / (float)(RAND_MAX);
17+
arr.push_back(temp);
18+
}
19+
arr_bkp = arr;
20+
21+
/* call avx512 quicksort */
22+
for (auto _ : state) {
23+
avx512_qsort<T>(arr.data(), ARRSIZE);
24+
state.PauseTiming();
25+
arr = arr_bkp;
26+
state.ResumeTiming();
27+
}
28+
}
29+
else {
30+
state.SkipWithMessage("Requires AVX512-FP16 ISA");
31+
}
32+
}
33+
34+
template <typename T>
35+
static void stdsort(benchmark::State& state) {
36+
if (cpu_has_avx512fp16()) {
37+
// Perform setup here
38+
size_t ARRSIZE = state.range(0);
39+
std::vector<T> arr;
40+
std::vector<T> arr_bkp;
41+
42+
for (size_t jj = 0; jj < ARRSIZE; ++jj) {
43+
_Float16 temp = (float) rand() / (float)(RAND_MAX);
44+
arr.push_back(temp);
45+
}
46+
arr_bkp = arr;
47+
48+
/* call avx512 quicksort */
49+
for (auto _ : state) {
50+
std::sort(arr.begin(), arr.end());
51+
state.PauseTiming();
52+
arr = arr_bkp;
53+
state.ResumeTiming();
54+
}
55+
}
56+
else {
57+
state.SkipWithMessage("Requires AVX512-FP16 ISA");
58+
}
59+
}
60+
61+
// Register the function as a benchmark
62+
BENCHMARK(avx512_qsort<_Float16>)->Arg(10000)->Arg(1000000);
63+
BENCHMARK(stdsort<_Float16>)->Arg(10000)->Arg(1000000);

0 commit comments

Comments
 (0)