Skip to content

Commit

Permalink
bpf: Add generic attach/detach/query API for multi-progs
Browse files Browse the repository at this point in the history
[ commit tbd ]

Co-developed-by: Nikolay Aleksandrov <[email protected]>
Signed-off-by: Nikolay Aleksandrov <[email protected]>
Signed-off-by: Daniel Borkmann <[email protected]>
  • Loading branch information
borkmann committed May 17, 2023
1 parent a206329 commit ed9901f
Show file tree
Hide file tree
Showing 19 changed files with 1,588 additions and 162 deletions.
5 changes: 4 additions & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -3776,6 +3776,7 @@ F: kernel/bpf/core.c
F: kernel/bpf/syscall.c
F: kernel/bpf/dispatcher.c
F: kernel/bpf/trampoline.c
F: kernel/bpf/mprog.c
F: include/linux/bpf*
F: include/linux/filter.h
F: include/linux/tnum.h
Expand All @@ -3795,13 +3796,15 @@ S: Maintained
F: kernel/trace/bpf_trace.c
F: kernel/bpf/stackmap.c

BPF [NETWORKING] (tc BPF, sock_addr)
BPF [NETWORKING] (tcx & tc BPF, sock_addr)
M: Martin KaFai Lau <[email protected]>
M: Daniel Borkmann <[email protected]>
R: John Fastabend <[email protected]>
L: [email protected]
L: [email protected]
S: Maintained
F: include/net/tcx.h
F: kernel/bpf/tcx.c
F: net/core/filter.c
F: net/sched/act_bpf.c
F: net/sched/cls_bpf.c
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1752,6 +1752,7 @@ struct bpf_prog_array_item {
union {
struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
u64 bpf_cookie;
u32 flags;
};
};

Expand Down
291 changes: 291 additions & 0 deletions include/linux/bpf_mprog.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2023 Isovalent */
#ifndef __BPF_MPROG_H
#define __BPF_MPROG_H

#include <linux/bpf.h>

#define BPF_MPROG_MAX 64
#define BPF_MPROG_SWAP 1
#define BPF_MPROG_FREE 2

struct bpf_prog_fp {
struct bpf_prog *prog;
};

struct bpf_prog_cp {
struct bpf_link *link;
u32 flags;
};

struct bpf_mprog_entry {
struct bpf_prog_fp fp_items[BPF_MPROG_MAX] ____cacheline_aligned;
struct bpf_prog_cp cp_items[BPF_MPROG_MAX] ____cacheline_aligned;
struct bpf_mprog_entry_pair *parent;
};

struct bpf_mprog_entry_pair {
struct bpf_mprog_entry a;
struct bpf_mprog_entry b;
struct rcu_head rcu;
struct bpf_prog * ref;
atomic_t revision;
};

struct bpf_tuple {
struct bpf_prog *prog;
struct bpf_link *link;
};

static inline struct bpf_mprog_entry *
bpf_mprog_peer(const struct bpf_mprog_entry *entry)
{
if (entry == &entry->parent->a)
return &entry->parent->b;
else
return &entry->parent->a;
}

static inline struct bpf_mprog_entry *bpf_mprog_create(size_t extra_size)
{
struct bpf_mprog_entry_pair *pair;

BUILD_BUG_ON(ARRAY_SIZE(pair->a.fp_items) != ARRAY_SIZE(pair->a.cp_items));
/* Fast-path items are not extensible, must only contain prog pointer! */
BUILD_BUG_ON(sizeof(pair->a.fp_items[0]) > sizeof(u64));

pair = kzalloc(sizeof(*pair) + extra_size, GFP_KERNEL);
if (pair) {
atomic_set(&pair->revision, 1);
pair->a.parent = pair;
pair->b.parent = pair;
return &pair->a;
}
return NULL;
}

static inline void bpf_mprog_free(struct bpf_mprog_entry *entry)
{
kfree_rcu(entry->parent, rcu);
}

static inline void bpf_mprog_mark_ref(struct bpf_mprog_entry *entry,
struct bpf_prog *prog)
{
WARN_ON_ONCE(entry->parent->ref);
entry->parent->ref = prog;
}

static inline bool bpf_mprog_flags_ok(u32 flags, bool attach)
{
if ((flags & BPF_F_REPLACE) && !attach)
return false;
if ((flags & BPF_F_REPLACE) && (flags & (BPF_F_BEFORE | BPF_F_AFTER | BPF_F_LINK)))
return false;
if ((flags & BPF_F_LINK) && !(flags & (BPF_F_BEFORE | BPF_F_AFTER)))
return false;
if ((flags & BPF_F_FIRST) && (flags & BPF_F_AFTER))
return false;
if ((flags & BPF_F_LAST) && (flags & BPF_F_BEFORE))
return false;
if ((flags & (BPF_F_BEFORE | BPF_F_AFTER)) == (BPF_F_BEFORE | BPF_F_AFTER))
return false;
if ((flags & (BPF_F_FIRST | BPF_F_LAST)) == (BPF_F_FIRST | BPF_F_LAST) &&
(flags & (BPF_F_BEFORE | BPF_F_AFTER)))
return false;
return true;
}

static inline bool bpf_mprog_rprog_ok(u32 flags, bool relative_prog)
{
if (!relative_prog &&
(flags & (BPF_F_REPLACE | BPF_F_BEFORE | BPF_F_AFTER)))
return false;
if (relative_prog &&
!(flags & (BPF_F_REPLACE | BPF_F_BEFORE | BPF_F_AFTER)))
return false;
return true;
}

static inline u32 bpf_mprog_flags(u32 cur_flags, u32 req_flags, u32 flag)
{
if (req_flags & flag)
cur_flags |= flag;
else
cur_flags &= ~flag;
return cur_flags;
}

static inline u32 bpf_mprog_max(void)
{
return ARRAY_SIZE(((struct bpf_mprog_entry *)NULL)->fp_items) - 1;
}

static inline struct bpf_prog *bpf_mprog_first(struct bpf_mprog_entry *entry)
{
return READ_ONCE(entry->fp_items[0].prog);
}

static inline struct bpf_prog *bpf_mprog_last(struct bpf_mprog_entry *entry)
{
struct bpf_prog *prog = NULL, *tmp;
struct bpf_prog_fp *item;
int i;

for (i = 0; i < bpf_mprog_max(); i++) {
item = &entry->fp_items[i];
tmp = READ_ONCE(item->prog);
if (!tmp)
break;
prog = tmp;
}
return prog;
}

static inline void bpf_mprog_commit(struct bpf_mprog_entry *entry)
{
do {
atomic_inc(&entry->parent->revision);
} while (atomic_read(&entry->parent->revision) == 0);
synchronize_rcu();
if (entry->parent->ref) {
bpf_prog_put(entry->parent->ref);
entry->parent->ref = NULL;
}
}

static inline void bpf_mprog_entry_clear(struct bpf_mprog_entry *entry)
{
memset(entry->fp_items, 0, sizeof(entry->fp_items));
memset(entry->cp_items, 0, sizeof(entry->cp_items));
}

static inline u64 bpf_mprog_revision(struct bpf_mprog_entry *entry)
{
return atomic_read(&entry->parent->revision);
}

static inline void bpf_mprog_read(struct bpf_mprog_entry *entry, u32 which,
struct bpf_prog_fp **fp_dst,
struct bpf_prog_cp **cp_dst)
{
*fp_dst = &entry->fp_items[which];
*cp_dst = &entry->cp_items[which];
}

static inline void bpf_mprog_write(struct bpf_prog_fp *fp_dst,
struct bpf_prog_cp *cp_dst,
struct bpf_tuple *tuple, u32 flags)
{
WRITE_ONCE(fp_dst->prog, tuple->prog);
cp_dst->link = tuple->link;
cp_dst->flags = flags;
}

static inline void bpf_mprog_copy(struct bpf_prog_fp *fp_dst,
struct bpf_prog_cp *cp_dst,
struct bpf_prog_fp *fp_src,
struct bpf_prog_cp *cp_src)
{
WRITE_ONCE(fp_dst->prog, READ_ONCE(fp_src->prog));
memcpy(cp_dst, cp_src, sizeof(*cp_src));
}

static inline void bpf_mprog_copy_range(struct bpf_mprog_entry *peer,
struct bpf_mprog_entry *entry,
u32 idx_peer, u32 idx_entry, u32 num)
{
memcpy(&peer->fp_items[idx_peer], &entry->fp_items[idx_entry],
num * sizeof(peer->fp_items[0]));
memcpy(&peer->cp_items[idx_peer], &entry->cp_items[idx_entry],
num * sizeof(peer->cp_items[0]));
}

#define bpf_mprog_foreach(entry, fp, cp, tuple) \
for (fp = &entry->fp_items[0], cp = &entry->cp_items[0]; \
({ \
tuple->prog = READ_ONCE(fp->prog); \
tuple->link = cp->link; \
tuple->prog; \
}); \
fp++, cp++)

static inline u32 bpf_mprog_total(struct bpf_mprog_entry *entry)
{
const struct bpf_prog_fp *item;
const struct bpf_prog *prog;
u32 num = 0;

item = &entry->fp_items[0];
while ((prog = READ_ONCE(item->prog))) {
num++;
item++;
}
return num;
}

static inline int
bpf_mprog_tuple_relative(struct bpf_tuple *tuple,
u32 relobj, u32 flags,
enum bpf_prog_type type)
{
struct bpf_prog *ptmp;
struct bpf_link *ltmp;

memset(tuple, 0, sizeof(*tuple));
if (flags & BPF_F_LINK) {
if (flags & BPF_F_ID) {
ltmp = bpf_link_by_id(relobj);
} else {
if (!relobj)
return -EINVAL;
ltmp = bpf_link_get_from_fd(relobj);
}
if (IS_ERR(ltmp))
return PTR_ERR(ltmp);
if (ltmp->prog->type != type) {
bpf_link_put(ltmp);
return -EINVAL;
}
tuple->link = ltmp;
tuple->prog = ltmp->prog;
} else {
if (flags & BPF_F_ID) {
ptmp = bpf_prog_by_id(relobj);
} else {
if (!relobj)
return 0;
ptmp = bpf_prog_get(relobj);
}
if (IS_ERR(ptmp))
return PTR_ERR(ptmp);
if (ptmp->type != type) {
bpf_prog_put(ptmp);
return -EINVAL;
}
tuple->link = NULL;
tuple->prog = ptmp;
}
return 0;
}

static inline void
bpf_mprog_tuple_put(struct bpf_tuple *tuple)
{
if (tuple->link)
bpf_link_put(tuple->link);
else if (tuple->prog)
bpf_prog_put(tuple->prog);
}

int bpf_mprog_attach(struct bpf_mprog_entry *entry, struct bpf_prog *nprog,
struct bpf_link *nlink, u32 aflags, u32 relobj,
u32 expected_revision);
int bpf_mprog_detach(struct bpf_mprog_entry *entry, struct bpf_prog *dprog,
struct bpf_link *dlink, u32 dflags, u32 relobj,
u32 expected_revision);

int bpf_mprog_query(const union bpf_attr *attr, union bpf_attr __user *uattr,
struct bpf_mprog_entry *entry);

#endif /* __BPF_MPROG_H */
15 changes: 6 additions & 9 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1927,8 +1927,7 @@ enum netdev_ml_priv_type {
*
* @rx_handler: handler for received packets
* @rx_handler_data: XXX: need comments on this one
* @miniq_ingress: ingress/clsact qdisc specific data for
* ingress processing
* @tcx_ingress: BPF & clsact qdisc specific data for ingress processing
* @ingress_queue: XXX: need comments on this one
* @nf_hooks_ingress: netfilter hooks executed for ingress packets
* @broadcast: hw bcast address
Expand All @@ -1949,8 +1948,7 @@ enum netdev_ml_priv_type {
* @xps_maps: all CPUs/RXQs maps for XPS device
*
* @xps_maps: XXX: need comments on this one
* @miniq_egress: clsact qdisc specific data for
* egress processing
* @tcx_egress: BPF & clsact qdisc specific data for egress processing
* @nf_hooks_egress: netfilter hooks executed for egress packets
* @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
Expand Down Expand Up @@ -2249,9 +2247,8 @@ struct net_device {
unsigned int gro_ipv4_max_size;
rx_handler_func_t __rcu *rx_handler;
void __rcu *rx_handler_data;

#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_ingress;
#ifdef CONFIG_NET_XGRESS
struct bpf_mprog_entry __rcu *tcx_ingress;
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
Expand Down Expand Up @@ -2279,8 +2276,8 @@ struct net_device {
#ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
#endif
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;
#ifdef CONFIG_NET_XGRESS
struct bpf_mprog_entry __rcu *tcx_egress;
#endif
#ifdef CONFIG_NETFILTER_EGRESS
struct nf_hook_entries __rcu *nf_hooks_egress;
Expand Down
4 changes: 2 additions & 2 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,7 @@ struct sk_buff {
__u8 __mono_tc_offset[0];
/* public: */
__u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
#ifdef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NET_XGRESS
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
__u8 tc_skip_classify:1;
#endif
Expand Down Expand Up @@ -992,7 +992,7 @@ struct sk_buff {
__u8 csum_not_inet:1;
#endif

#ifdef CONFIG_NET_SCHED
#if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
__u16 tc_index; /* traffic control index */
#endif

Expand Down
2 changes: 1 addition & 1 deletion include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ int skb_do_redirect(struct sk_buff *);

static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
#ifdef CONFIG_NET_XGRESS
return skb->tc_at_ingress;
#else
return false;
Expand Down
Loading

0 comments on commit ed9901f

Please sign in to comment.