Skip to content

Commit 4d8a2df

Browse files
committed
pping: WIP - add RTT-based sampling
To allow the rtt-rate to be passed as a float (so you can do for ex 1 RTT-sample every 0.1 RTT periods), I've used fixed-point arithmetic as floats are not supported in BPF (or the kernel in general). I've never tried to use fixed-point arithmetic before, so hopefully I haven't screwed it up too badly. Have limited the RTT-based sampling to a maximum of 10k*RTT in order for overflow in fixed-point multiplication to be unlikely. The RTT-interval is based on a smoothed RTT (moving average) calculated the same way as for the TCP stack (srtt = 7/8*prev_rtt + 1/8*rtt). Signed-off-by: Simon Sundberg <[email protected]>
1 parent a30443c commit 4d8a2df

File tree

3 files changed

+56
-6
lines changed

3 files changed

+56
-6
lines changed

pping/pping.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ static const struct option long_options[] = {
9393
{ "help", no_argument, NULL, 'h' },
9494
{ "interface", required_argument, NULL, 'i' }, // Name of interface to run on
9595
{ "rate-limit", required_argument, NULL, 'r' }, // Sampling rate-limit in ms
96+
{ "rtt-rate", required_argument, NULL, 'R' }, // Sampling rate in terms of flow-RTT (ex 1 sample per RTT-interval)
9697
{ "force", no_argument, NULL, 'f' }, // Overwrite any existing XDP program on interface, remove qdisc on cleanup
9798
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s
9899
{ "format", required_argument, NULL, 'F' }, // Which format to output in (standard/json/ppviz)
@@ -158,14 +159,14 @@ static int parse_bounded_double(double *res, const char *str, double low,
158159
static int parse_arguments(int argc, char *argv[], struct pping_config *config)
159160
{
160161
int err, opt;
161-
double rate_limit_ms, cleanup_interval_s;
162+
double rate_limit_ms, cleanup_interval_s, rtt_rate;
162163

163164
config->ifindex = 0;
164165
config->force = false;
165166
config->json_format = false;
166167
config->ppviz_format = false;
167168

168-
while ((opt = getopt_long(argc, argv, "hfi:r:c:F:I:", long_options,
169+
while ((opt = getopt_long(argc, argv, "hfi:r:R:c:F:I:", long_options,
169170
NULL)) != -1) {
170171
switch (opt) {
171172
case 'i':
@@ -193,6 +194,14 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
193194
config->bpf_config.rate_limit =
194195
rate_limit_ms * NS_PER_MS;
195196
break;
197+
case 'R':
198+
err = parse_bounded_double(&rtt_rate, optarg, 0, 10000,
199+
"rate-limit");
200+
if (err)
201+
return -EINVAL;
202+
config->bpf_config.rtt_rate =
203+
DOUBLE_TO_FIXPOINT(rtt_rate);
204+
196205
case 'c':
197206
err = parse_bounded_double(&cleanup_interval_s, optarg,
198207
0, 1000000000,
@@ -443,6 +452,8 @@ static bool flow_timeout(void *key_ptr, void *val_ptr, __u64 now)
443452
struct flow_event fe;
444453
__u64 ts = ((struct flow_state *)val_ptr)->last_timestamp;
445454

455+
printf(">>> now: %llu, ts: %llu, diff: %llu\n", now, ts, now - ts);
456+
446457
if (now > ts && now - ts > FLOW_LIFETIME) {
447458
if (print_event_func) {
448459
fe.event_type = EVENT_TYPE_FLOW;
@@ -917,7 +928,7 @@ int main(int argc, char *argv[])
917928
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_egress_opts);
918929

919930
struct pping_config config = {
920-
.bpf_config = { .rate_limit = 100 * NS_PER_MS },
931+
.bpf_config = { .rate_limit = 100 * NS_PER_MS, .rtt_rate = 0 },
921932
.cleanup_interval = 1 * NS_PER_SECOND,
922933
.object_path = "pping_kern.o",
923934
.ingress_sec = SEC_INGRESS_XDP,

pping/pping.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
#define SEC_INGRESS_TC "classifier/ingress"
1111
#define SEC_EGRESS_TC "classifier/egress"
1212

13+
typedef __u64 fixpoint64;
14+
#define FIXPOINT_SHIFT 16
15+
#define DOUBLE_TO_FIXPOINT(X) ((fixpoint64)((X) * (1UL << FIXPOINT_SHIFT)))
16+
#define FIXPOINT_TO_UINT(X) ((X) >> FIXPOINT_SHIFT)
17+
1318
/* For the event_type members of rtt_event and flow_event */
1419
#define EVENT_TYPE_FLOW 1
1520
#define EVENT_TYPE_RTT 2
@@ -38,6 +43,7 @@ enum __attribute__((__packed__)) flow_event_source {
3843

3944
struct bpf_config {
4045
__u64 rate_limit;
46+
fixpoint64 rtt_rate;
4147
};
4248

4349
/*
@@ -68,6 +74,7 @@ struct network_tuple {
6874

6975
struct flow_state {
7076
__u64 min_rtt;
77+
__u64 srtt;
7178
__u64 last_timestamp;
7279
__u64 sent_pkts;
7380
__u64 sent_bytes;

pping/pping_kern.c

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,33 @@ static __u32 remaining_pkt_payload(struct parsing_context *ctx)
323323
return parsed_bytes < ctx->pkt_len ? ctx->pkt_len - parsed_bytes : 0;
324324
}
325325

326+
/*
327+
* Calculate a smooted rtt similar to how TCP stack does it in
328+
* net/ipv4/tcp_input.c/tcp_rtt_estimator().
329+
*
330+
* NOTE: Will cause roundoff errors, but if RTTs > 1000ns errors should be small
331+
*/
332+
static __u64 calculate_srtt(__u64 prev_srtt, __u64 rtt)
333+
{
334+
if (!prev_srtt)
335+
return rtt;
336+
// srtt = 7/8*prev_srtt + 1/8*rtt
337+
return prev_srtt - (prev_srtt >> 3) + (rtt >> 3);
338+
}
339+
340+
static bool wait_for_rate_limit(__u64 now, __u64 last_ts, __u64 rtt)
341+
{
342+
if (now < last_ts)
343+
return true;
344+
345+
// RTT-based rate limit
346+
if (config.rtt_rate && rtt)
347+
return now - last_ts < FIXPOINT_TO_UINT(config.rtt_rate * rtt);
348+
349+
// Static rate limit
350+
return now - last_ts < config.rate_limit;
351+
}
352+
326353
/*
327354
* Fills in event_type, timestamp, flow, source and reserved.
328355
* Does not fill in the flow_info.
@@ -402,8 +429,8 @@ static void pping_egress(void *ctx, struct parsing_context *pctx)
402429
f_state->last_id = p_id.identifier;
403430

404431
// Check rate-limit
405-
if (!new_flow && (now < f_state->last_timestamp ||
406-
now - f_state->last_timestamp < config.rate_limit))
432+
if (!new_flow &&
433+
wait_for_rate_limit(now, f_state->last_timestamp, f_state->srtt))
407434
return;
408435

409436
/*
@@ -448,12 +475,12 @@ static void pping_ingress(void *ctx, struct parsing_context *pctx)
448475
goto validflow_out;
449476

450477
re.rtt = now - *p_ts;
451-
452478
// Delete timestamp entry as soon as RTT is calculated
453479
bpf_map_delete_elem(&packet_ts, &p_id);
454480

455481
if (f_state->min_rtt == 0 || re.rtt < f_state->min_rtt)
456482
f_state->min_rtt = re.rtt;
483+
f_state->srtt = calculate_srtt(f_state->srtt, re.rtt);
457484

458485
// Fill event and push to perf-buffer
459486
re.event_type = EVENT_TYPE_RTT;
@@ -526,6 +553,11 @@ int pping_xdp_ingress(struct xdp_md *ctx)
526553
.is_egress = false,
527554
};
528555

556+
bpf_printk("%llu - %llu\n", config.rate_limit, config.rtt_rate);
557+
__u64 rtt = 1000000000;
558+
bpf_printk("rtt_rate * %llu = %llu\n", rtt,
559+
FIXPOINT_TO_UINT(config.rtt_rate * rtt));
560+
529561
pping_ingress(ctx, &pctx);
530562

531563
return XDP_PASS;

0 commit comments

Comments
 (0)