Skip to content

Commit b8a0432

Browse files
Xu KuohaiKernel Patches Daemon
authored andcommitted
selftests/bpf/benchs: Add overwrite mode benchmark for BPF ring buffer
Add --rb-overwrite option to benchmark BPF ring buffer in overwrite mode. Since overwrite mode is not yet supported by libbpf for consumer, also add --rb-bench-producer option to benchmark producer directly without a consumer. Benchmarks on an x86_64 and an arm64 CPU are shown below for reference. - AMD EPYC 9654 (x86_64) Ringbuf, multi-producer contention in overwrite mode, no consumer ================================================================= rb-prod nr_prod 1 32.180 ± 0.033M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 9.617 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 8.810 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 9.272 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 9.173 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 3.086 ± 0.032M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 2.945 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 2.519 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 2.545 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 2.363 ± 0.024M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 2.357 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 2.267 ± 0.011M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 2.284 ± 0.020M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 2.215 ± 0.025M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 2.193 ± 0.023M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 52 2.208 ± 0.024M/s (drops 0.000 ± 0.000M/s) - HiSilicon Kunpeng 920 (arm64) Ringbuf, multi-producer contention in overwrite mode, no consumer ================================================================= rb-prod nr_prod 1 14.478 ± 0.006M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 2 21.787 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 3 6.045 ± 0.001M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 4 5.352 ± 0.003M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 8 4.850 ± 0.002M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 12 3.542 ± 0.016M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 16 3.509 ± 0.021M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 20 3.171 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 24 3.154 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 28 2.974 ± 0.015M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 32 3.167 ± 0.014M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 36 2.903 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 40 2.866 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 44 2.914 ± 0.010M/s (drops 0.000 ± 0.000M/s) rb-prod nr_prod 48 2.806 ± 0.012M/s (drops 0.000 ± 0.000M/s) Rb-prod nr_prod 52 2.840 ± 0.012M/s (drops 0.000 ± 0.000M/s) Signed-off-by: Xu Kuohai <[email protected]>
1 parent 06800b3 commit b8a0432

File tree

3 files changed

+75
-6
lines changed

3 files changed

+75
-6
lines changed

tools/testing/selftests/bpf/benchs/bench_ringbufs.c

Lines changed: 60 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ static struct {
1919
int ringbuf_sz; /* per-ringbuf, in bytes */
2020
bool ringbuf_use_output; /* use slower output API */
2121
int perfbuf_sz; /* per-CPU size, in pages */
22+
bool overwrite;
23+
bool bench_producer;
2224
} args = {
2325
.back2back = false,
2426
.batch_cnt = 500,
@@ -27,6 +29,8 @@ static struct {
2729
.ringbuf_sz = 512 * 1024,
2830
.ringbuf_use_output = false,
2931
.perfbuf_sz = 128,
32+
.overwrite = false,
33+
.bench_producer = false,
3034
};
3135

3236
enum {
@@ -35,6 +39,8 @@ enum {
3539
ARG_RB_BATCH_CNT = 2002,
3640
ARG_RB_SAMPLED = 2003,
3741
ARG_RB_SAMPLE_RATE = 2004,
42+
ARG_RB_OVERWRITE = 2005,
43+
ARG_RB_BENCH_PRODUCER = 2006,
3844
};
3945

4046
static const struct argp_option opts[] = {
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
4349
{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
4450
{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
4551
{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
52+
{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
53+
{ "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
4654
{},
4755
};
4856

@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
7280
argp_usage(state);
7381
}
7482
break;
83+
case ARG_RB_OVERWRITE:
84+
args.overwrite = true;
85+
break;
86+
case ARG_RB_BENCH_PRODUCER:
87+
args.bench_producer = true;
88+
break;
7589
default:
7690
return ARGP_ERR_UNKNOWN;
7791
}
@@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void)
95109

96110
static void bufs_validate(void)
97111
{
98-
if (env.consumer_cnt != 1) {
99-
fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
112+
if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
113+
fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
114+
exit(1);
115+
}
116+
117+
if (args.overwrite && !args.bench_producer) {
118+
fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
119+
exit(1);
120+
}
121+
122+
if (args.bench_producer && env.consumer_cnt != 0) {
123+
fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
124+
exit(1);
125+
}
126+
127+
if (args.bench_producer && args.back2back) {
128+
fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
129+
exit(1);
130+
}
131+
132+
if (args.bench_producer && args.sampled) {
133+
fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
134+
exit(1);
135+
}
136+
137+
if (!args.bench_producer && env.consumer_cnt != 1) {
138+
fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
100139
exit(1);
101140
}
102141

@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
128167
{
129168
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
130169

131-
res->hits = atomic_swap(&buf_hits.value, 0);
170+
if (args.bench_producer)
171+
res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
172+
else
173+
res->hits = atomic_swap(&buf_hits.value, 0);
132174
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
133175
}
134176

135177
static struct ringbuf_bench *ringbuf_setup_skeleton(void)
136178
{
179+
__u32 flags;
180+
struct bpf_map *ringbuf;
137181
struct ringbuf_bench *skel;
138182

139183
setup_libbpf();
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
146190

147191
skel->rodata->batch_cnt = args.batch_cnt;
148192
skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
193+
skel->rodata->bench_producer = args.bench_producer;
149194

150195
if (args.sampled)
151196
/* record data + header take 16 bytes */
152197
skel->rodata->wakeup_data_size = args.sample_rate * 16;
153198

154-
bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
199+
ringbuf = skel->maps.ringbuf;
200+
if (args.overwrite) {
201+
flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
202+
bpf_map__set_map_flags(ringbuf, flags);
203+
}
204+
205+
bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
155206

156207
if (ringbuf_bench__load(skel)) {
157208
fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +222,13 @@ static void ringbuf_libbpf_setup(void)
171222
{
172223
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
173224
struct bpf_link *link;
225+
int map_fd;
174226

175227
ctx->skel = ringbuf_setup_skeleton();
176-
ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
177-
buf_process_sample, NULL, NULL);
228+
229+
map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
230+
ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample,
231+
NULL, NULL);
178232
if (!ctx->ringbuf) {
179233
fprintf(stderr, "failed to create ringbuf\n");
180234
exit(1);

tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
4949
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
5050
done
5151

52+
header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
53+
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
54+
summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
55+
done

tools/testing/selftests/bpf/progs/ringbuf_bench.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0
22
// Copyright (c) 2020 Facebook
33

4+
#include <stdbool.h>
45
#include <linux/bpf.h>
56
#include <stdint.h>
67
#include <bpf/bpf_helpers.h>
@@ -14,16 +15,21 @@ struct {
1415

1516
const volatile int batch_cnt = 0;
1617
const volatile long use_output = 0;
18+
const volatile bool bench_producer = false;
1719

1820
long sample_val = 42;
1921
long dropped __attribute__((aligned(128))) = 0;
22+
long hits __attribute__((aligned(128))) = 0;
2023

2124
const volatile long wakeup_data_size = 0;
2225

2326
static __always_inline long get_flags()
2427
{
2528
long sz;
2629

30+
if (bench_producer)
31+
return BPF_RB_NO_WAKEUP;
32+
2733
if (!wakeup_data_size)
2834
return 0;
2935

@@ -47,6 +53,8 @@ int bench_ringbuf(void *ctx)
4753
*sample = sample_val;
4854
flags = get_flags();
4955
bpf_ringbuf_submit(sample, flags);
56+
if (bench_producer)
57+
__sync_add_and_fetch(&hits, 1);
5058
}
5159
}
5260
} else {
@@ -55,6 +63,9 @@ int bench_ringbuf(void *ctx)
5563
if (bpf_ringbuf_output(&ringbuf, &sample_val,
5664
sizeof(sample_val), flags))
5765
__sync_add_and_fetch(&dropped, 1);
66+
else if (bench_producer)
67+
__sync_add_and_fetch(&hits, 1);
68+
5869
}
5970
}
6071
return 0;

0 commit comments

Comments
 (0)